merge from trunk

This commit is contained in:
ldolse 2011-01-19 09:06:04 +08:00
commit 7222eca39f
39 changed files with 532 additions and 368 deletions

View File

@ -1,6 +1,4 @@
@echo OFF
REM CalibreRun.bat
REM ~~~~~~~~~~~~~~
REM Batch File to start a Calibre configuration on Windows
REM giving explicit control of the location of:
REM - Calibe Program Files
@ -24,7 +22,10 @@ REM -------------------------------------
REM Set up Calibre Config folder
REM -------------------------------------
If EXIST CalibreConfig SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
IF EXIST CalibreConfig (
SET CALIBRE_CONFIG_DIRECTORY=%cd%\CalibreConfig
ECHO CONFIG=%cd%\CalibreConfig
)
REM --------------------------------------------------------------
@ -38,24 +39,53 @@ REM drive letter of the USB stick.
REM Comment out any of the following that are not to be used
REM --------------------------------------------------------------
SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
IF EXIST CalibreLibrary SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
IF EXIST CalibreBooks SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks
IF EXIST U:\eBooks\CalibreLibrary (
SET CALIBRE_LIBRARY_DIRECTORY=U:\eBOOKS\CalibreLibrary
ECHO LIBRARY=U:\eBOOKS\CalibreLibrary
)
IF EXIST CalibreLibrary (
SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreLibrary
ECHO LIBRARY=%cd%\CalibreLibrary
)
IF EXIST CalibreBooks (
SET CALIBRE_LIBRARY_DIRECTORY=%cd%\CalibreBooks
ECHO LIBRARY=%cd%\CalibreBooks
)
REM --------------------------------------------------------------
REM Specify Location of metadata database (optional)
REM Specify Location of metadata database (optional)
REM
REM Location where the metadata.db file is located. If not set
REM the same location as Books files will be assumed. This.
REM options is used to get better performance when the Library is
REM on a (slow) network drive. Putting the metadata.db file
REM locally gives a big performance improvement.
REM locally makes gives a big performance improvement.
REM
REM NOTE. If you use this option, then the ability to switch
REM libraries within Calibre will be disabled. Therefore
REM you do not want to set it if the metadata.db file
REM is at the same location as the book files.
REM --------------------------------------------------------------
IF EXIST CalibreBooks SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db
IF EXIST CalibreMetadata SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
IF EXIST CalibreBooks (
IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreBooks" (
SET SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreBooks\metadata.db
ECHO DATABASE=%cd%\CalibreBooks\metadata.db
ECHO '
ECHO ***CAUTION*** Library Switching will be disabled
ECHO '
)
)
IF EXIST CalibreMetadata (
IF NOT "%CALIBRE_LIBRARY_DIRECTORY%" == "%cd%\CalibreMetadata" (
SET CALIBRE_OVERRIDE_DATABASE_PATH=%cd%\CalibreMetadata\metadata.db
ECHO DATABASE=%cd%\CalibreMetadata\metadata.db
ECHO '
ECHO ***CAUTION*** Library Switching will be disabled
ECHO '
)
)
REM --------------------------------------------------------------
REM Specify Location of source (optional)
@ -63,13 +93,20 @@ REM
REM It is easy to run Calibre from source
REM Just set the environment variable to where the source is located
REM When running from source the GUI will have a '*' after the version.
REM number that is displayed at the bottom of the Calibre main screen.
REM --------------------------------------------------------------
IF EXIST Calibre\src SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src
IF EXIST Calibre\src (
SET CALIBRE_DEVELOP_FROM=%cd%\Calibre\src
ECHO SOURCE=%cd%\Calibre\src
)
IF EXIST D:\Calibre\Calibre\src (
SET CALIBRE_DEVELOP_FROM=D:\Calibre\Calibre\src
ECHO SOURCE=D:\Calibre\Calibre\src
)
REM --------------------------------------------------------------
REM Specify Location of calibre binaries (optinal)
REM Specify Location of calibre binaries (optional)
REM
REM To avoid needing Calibre to be set in the search path, ensure
REM that Calibre Program Files is current directory when starting.
@ -78,21 +115,15 @@ REM This folder can be populated by cpying the Calibre2 folder from
REM an existing isntallation or by isntalling direct to here.
REM --------------------------------------------------------------
IF EXIST Calibre2 CD Calibre2
REM --------------------------------------------
REM Display settings that will be used
REM --------------------------------------------
echo PROGRAMS=%cd%
echo SOURCE=%CALIBRE_DEVELOP_FROM%
echo CONFIG=%CALIBRE_CONFIG_DIRECTORY%
echo LIBRARY=%CALIBRE_LIBRARY_DIRECTORY%
echo DATABASE=%CALIBRE_OVERRIDE_DATABASE_PATH%
IF EXIST Calibre2 (
Calibre2 CD Calibre2
ECHO PROGRAMS=%cd%
)
REM ----------------------------------------------------------
REM The following gives a chance to check the settings before
REM starting Calibre. It can be commented out if not wanted.
REM ----------------------------------------------------------
echo "Press CTRL-C if you do not want to continue"
pause
@ -111,4 +142,4 @@ REM Use with /WAIT to wait until Calibre completes to run a task on exit
REM --------------------------------------------------------
echo "Starting up Calibre"
START /belownormal Calibre --with-library %CALIBRE_LIBRARY_DIRECTORY%
START /belownormal Calibre --with-library "%CALIBRE_LIBRARY_DIRECTORY%"

View File

@ -6,6 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
These templates control the content of titles displayed in the various sections
Available fields:
{title} Title of the book
{series} Series name
@ -14,6 +16,7 @@ __docformat__ = 'restructuredtext en'
{rating_parens} Rating, in parentheses
{pubyear} Year the book was published
{pubyear_parens} Year the book was published, in parentheses
'''
# Books by Author
by_authors_normal_title_template = '{title} {pubyear_parens}'

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.3 KiB

View File

@ -9,13 +9,14 @@ __docformat__ = 'restructuredtext en'
elpais.es
'''
from time import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class ElPais(BasicNewsRecipe):
__author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells'
description = 'Main daily newspaper from Spain'
cover_url = 'http://www.elpais.com/im/tit_logo_global.gif'
title = u'El Pais'
publisher = u'Ediciones El Pa\xeds SL'
category = 'News, politics, culture, economy, general interest'
@ -62,6 +63,6 @@ class ElPais(BasicNewsRecipe):
(u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058')
]
def print_version(self, url):
url = url+'?print=1'
return url
def get_cover_url(self):
return 'http://img5.kiosko.net/' + strftime("%Y/%m/%d") + '/es/elpais.750.jpg'

View File

@ -5,7 +5,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
class IHNed(BasicNewsRecipe):
stahnout_vsechny = False
stahnout_vsechny = True
#True = stahuje vsechny z homepage
#False = stahuje pouze dnesni clanky (ze dne, kdy je skript spusten)

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -23,6 +24,10 @@ class NYTimes(BasicNewsRecipe):
webEdition = False
oldest_article = 7
# replace paid Kindle Version: the name will be changed to "The New York Times" to cause
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
replaceKindleVersion = False
# includeSections: List of sections to include. If empty, all sections found will be included.
# Otherwise, only the sections named will be included. For example,
#
@ -94,6 +99,10 @@ class NYTimes(BasicNewsRecipe):
title='New York Times (Web)'
description = 'New York Times on the Web'
needs_subscription = True
elif replaceKindleVersion:
title='The New York Times'
description = 'Today\'s New York Times'
needs_subscription = True
else:
title='New York Times'
description = 'Today\'s New York Times'
@ -150,6 +159,11 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule',
'side_tool',
'singleAd',
'entry entry-utility', #added for DealBook
'entry-tags', #added for DealBook
'footer promos clearfix', #added for DealBook
'footer links clearfix', #added for DealBook
'inlineImage module', #added for DealBook
re.compile('^subNavigation'),
re.compile('^leaderboard'),
re.compile('^module'),
@ -183,6 +197,9 @@ class NYTimes(BasicNewsRecipe):
'side_index',
'side_tool',
'toolsRight',
'skybox', #added for DealBook
'TopAd', #added for DealBook
'related-content', #added for DealBook
]),
dict(name=['script', 'noscript', 'style','form','hr'])]
no_stylesheets = True
@ -237,7 +254,7 @@ class NYTimes(BasicNewsRecipe):
def exclude_url(self,url):
if not url.startswith("http"):
return True
if not url.endswith(".html"):
if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
return True
if 'nytimes.com' not in url:
return True
@ -560,7 +577,6 @@ class NYTimes(BasicNewsRecipe):
def preprocess_html(self, soup):
if self.webEdition & (self.oldest_article>0):
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
if date_tag:
@ -583,128 +599,168 @@ class NYTimes(BasicNewsRecipe):
img_div = soup.find('div','inlineImage module')
if img_div:
img_div.extract()
return self.strip_anchors(soup)
def postprocess_html(self,soup, True):
try:
if self.one_picture_per_article:
# Remove all images after first
largeImg = soup.find(True, {'class':'articleSpanImage'})
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
if largeImg:
for inlineImg in inlineImgs:
inlineImg.extract()
else:
if inlineImgs:
firstImg = inlineImgs[0]
for inlineImg in inlineImgs[1:]:
inlineImg.extract()
# Move firstImg before article body
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
if cgFirst:
# Strip all sibling NavigableStrings: noise
navstrings = cgFirst.findAll(text=True, recursive=False)
[ns.extract() for ns in navstrings]
headline_found = False
tag = cgFirst.find(True)
insertLoc = 0
while True:
insertLoc += 1
if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
headline_found = True
break
tag = tag.nextSibling
if not tag:
headline_found = False
break
if headline_found:
cgFirst.insert(insertLoc,firstImg)
else:
self.log(">>> No class:'columnGroup first' found <<<")
except:
self.log("ERROR: One picture per article in postprocess_html")
try:
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and len(caption) > 0:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
except:
self.log("ERROR: Problem in change captions to italic")
try:
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
else:
# Blog entry - replace headline, remove <hr> tags
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
except:
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
try:
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
if self.one_picture_per_article:
# Remove all images after first
largeImg = soup.find(True, {'class':'articleSpanImage'})
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
if largeImg:
for inlineImg in inlineImgs:
inlineImg.extract()
else:
if inlineImgs:
firstImg = inlineImgs[0]
for inlineImg in inlineImgs[1:]:
inlineImg.extract()
# Move firstImg before article body
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
if cgFirst:
# Strip all sibling NavigableStrings: noise
navstrings = cgFirst.findAll(text=True, recursive=False)
[ns.extract() for ns in navstrings]
headline_found = False
tag = cgFirst.find(True)
insertLoc = 0
while True:
insertLoc += 1
if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
headline_found = True
break
tag = tag.nextSibling
if not tag:
headline_found = False
break
if headline_found:
cgFirst.insert(insertLoc,firstImg)
else:
self.log(">>> No class:'columnGroup first' found <<<")
except:
self.log("ERROR: One picture per article in postprocess_html")
try:
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
except:
self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})")
try:
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
except:
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
return soup
try:
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and len(caption) > 0:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
except:
self.log("ERROR: Problem in change captions to italic")
try:
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
blogheadline = str(h1) #added for dealbook
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
elif blogheadline.find('entry-title'):#added for dealbook
tag = Tag(soup, "h2")#added for dealbook
tag['class'] = "headline"#added for dealbook
tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
h1.replaceWith(tag)#added for dealbook
else:
# Blog entry - replace headline, remove <hr> tags - BCC I think this is no longer functional 1-18-2011
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.renderContents()))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
except:
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
try:
#if this is from a blog (dealbook, fix the byline format
bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
if bylineauthor:
tag = Tag(soup, "h6")
tag['class'] = "byline"
tag.insert(0, self.fixChars(bylineauthor.renderContents()))
bylineauthor.replaceWith(tag)
except:
self.log("ERROR: fixing byline author format")
try:
#if this is a blog (dealbook) fix the credit style for the pictures
blogcredit = soup.find('div',attrs={'class':'credit'})
if blogcredit:
tag = Tag(soup, "h6")
tag['class'] = "credit"
tag.insert(0, self.fixChars(blogcredit.renderContents()))
blogcredit.replaceWith(tag)
except:
self.log("ERROR: fixing credit format")
try:
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
#remove the <strong> update tag
blogupdated = soup.find('span', {'class':'update'})
if blogupdated:
blogupdated.replaceWith("")
except:
self.log("ERROR: Removing strong tag")
try:
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
except:
self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})")
try:
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
except:
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
return soup
def populate_article_metadata(self, article, soup, first):
shortparagraph = ""
try:

View File

@ -36,7 +36,7 @@ class ANDROID(USBMS):
# Google
0x18d1 : { 0x4e11 : [0x0100, 0x226, 0x227], 0x4e12: [0x0100, 0x226,
0x227], 0x4e21: [0x0100, 0x226, 0x227]},
0x227], 0x4e21: [0x0100, 0x226, 0x227], 0xb058: [0x0222]},
# Samsung
0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
@ -64,12 +64,13 @@ class ANDROID(USBMS):
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(EBOOK_DIR_MAIN)
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS']
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
'SGH-T849', '_MB300', 'A70S']
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S']

View File

@ -193,6 +193,9 @@ class LUMIREAD(USBMS):
THUMBNAIL_HEIGHT = 200
VENDOR_NAME = 'ACER'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'LUMIREAD_600'
def upload_cover(self, path, filename, metadata, filepath):
if metadata.thumbnail and metadata.thumbnail[-1]:
cfilepath = filepath.replace('/', os.sep)

View File

@ -229,7 +229,11 @@ class PRS505(USBMS):
debug_print('PRS505: not uploading cover')
return
debug_print('PRS505: uploading cover')
self._upload_cover(path, filename, metadata, filepath)
try:
self._upload_cover(path, filename, metadata, filepath)
except:
import traceback
traceback.print_exc()
def _upload_cover(self, path, filename, metadata, filepath):
if metadata.thumbnail and metadata.thumbnail[-1]:

View File

@ -483,29 +483,29 @@ OptionRecommendation(name='pubdate',
OptionRecommendation(name='timestamp',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the book timestamp (used by the date column in calibre).')),
OptionRecommendation(name='enable_heuristics',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Enable heurisic processing. This option must be set for any '
help=_('Enable heuristic processing. This option must be set for any '
'heuristic processing to take place.')),
OptionRecommendation(name='markup_chapter_headings',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Detect unformatted chapter headings and sub headings. Change '
help=_('Detect unformatted chapter headings and sub headings. Change '
'them to h2 and h3 tags. This setting will not create a TOC, '
'but can be used in conjunction with structure detection to create '
'one.')),
OptionRecommendation(name='italicize_common_cases',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Look for common words and patterns that denote '
'italics and italicize them.')),
OptionRecommendation(name='fix_indents',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Turn indentation created from multiple non-breaking space entities '
'into CSS indents.')),
OptionRecommendation(name='html_unwrap_factor',
recommended_value=0.40, level=OptionRecommendation.LOW,
help=_('Scale used to determine the length at which a line should '
@ -513,31 +513,31 @@ OptionRecommendation(name='html_unwrap_factor',
'default is 0.4, just below the median line length. If only a '
'few lines in the document require unwrapping this value should '
'be reduced')),
OptionRecommendation(name='unwrap_lines',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Unwrap lines using punctuation and other formatting clues.')),
OptionRecommendation(name='delete_blank_paragraphs',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Remove empty paragraphs from the document when they exist between '
'every other paragraph')),
OptionRecommendation(name='format_scene_breaks',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('left aligned scene break markers are center aligned. '
help=_('Left aligned scene break markers are center aligned. '
'Replace soft scene breaks that use multiple blank lines with'
'horizontal rules.')),
OptionRecommendation(name='dehyphenate',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Analyses hyphenated words throughout the document. The '
help=_('Analyze hyphenated words throughout the document. The '
'document itself is used as a dictionary to determine whether hyphens '
'should be retained or removed.')),
OptionRecommendation(name='renumber_headings',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Looks for occurences of sequential <h1> or <h2> tags. '
help=_('Looks for occurrences of sequential <h1> or <h2> tags. '
'The tags are renumbered to prevent splitting in the middle '
'of chapter headings.')),
@ -545,10 +545,10 @@ OptionRecommendation(name='sr1_search',
recommended_value='', level=OptionRecommendation.LOW,
help=_('Search pattern (regular expression) to be replaced with '
'sr1-replace.')),
OptionRecommendation(name='sr1_replace',
recommended_value='', level=OptionRecommendation.LOW,
help=_('Replace characters to replace the text found with sr1-search.')),
help=_('Replacement to replace the text found with sr1-search.')),
OptionRecommendation(name='sr2_search',
recommended_value='', level=OptionRecommendation.LOW,
@ -557,7 +557,7 @@ OptionRecommendation(name='sr2_search',
OptionRecommendation(name='sr2_replace',
recommended_value='', level=OptionRecommendation.LOW,
help=_('Replace characters to replace the text found with sr2-search.')),
help=_('Replacement to replace the text found with sr2-search.')),
OptionRecommendation(name='sr3_search',
recommended_value='', level=OptionRecommendation.LOW,
@ -566,7 +566,7 @@ OptionRecommendation(name='sr3_search',
OptionRecommendation(name='sr3_replace',
recommended_value='', level=OptionRecommendation.LOW,
help=_('Replace characters to replace the text found with sr3-search.')),
help=_('Replacement to replace the text found with sr3-search.')),
]
# }}}

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
import functools, re
from calibre import entity_to_unicode
from calibre import entity_to_unicode, as_unicode
XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
SVG_NS = 'http://www.w3.org/2000/svg'
@ -463,7 +463,8 @@ class HTMLPreProcessor(object):
replace_txt = ''
rules.insert(0, (search_re, replace_txt))
except Exception as e:
self.log.error('Failed to parse %s regexp because %s' % (search, e))
self.log.error('Failed to parse %r regexp because %s' %
(search, as_unicode(e)))
end_rules = []
# delete soft hyphens - moved here so it's executed after header/footer removal

View File

@ -54,7 +54,7 @@ class HeuristicProcessor(object):
return '<'+styles+' style="page-break-before:always">'+chap
def analyze_title_matches(self, match):
chap = match.group('chap')
#chap = match.group('chap')
title = match.group('title')
if not title:
self.chapters_no_title = self.chapters_no_title + 1
@ -102,8 +102,7 @@ class HeuristicProcessor(object):
min_lns = tot_ln_fds * percent
#self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
if min_lns > tot_htm_ends:
return True
return min_lns > tot_htm_ends
def dump(self, raw, where):
import os
@ -136,7 +135,7 @@ class HeuristicProcessor(object):
'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
'Mlle.', 'Mons.', 'PS.', 'PPS.',
]
ITALICIZE_STYLE_PATS = [
r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=\s)',
r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=\s)',
@ -150,7 +149,7 @@ class HeuristicProcessor(object):
r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=\s)',
r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=\s)',
]
for word in ITALICIZE_WORDS:
html = html.replace(word, '<i>%s</i>' % word)
@ -242,7 +241,7 @@ class HeuristicProcessor(object):
lp_title = default_title
else:
lp_title = simple_title
if ignorecase:
arg_ignorecase = r'(?i)'
else:
@ -250,7 +249,7 @@ class HeuristicProcessor(object):
if title_req:
lp_opt_title_open = ''
lp_opt_title_close = ''
lp_opt_title_close = ''
else:
lp_opt_title_open = opt_title_open
lp_opt_title_close = opt_title_close
@ -399,7 +398,7 @@ class HeuristicProcessor(object):
if len(lines) > 1:
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
if float(len(blanklines)) / float(len(lines)) > 0.40:
return True
else:
@ -460,7 +459,7 @@ class HeuristicProcessor(object):
if getattr(self.extra_opts, 'markup_chapter_headings', False):
html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
if getattr(self.extra_opts, 'italicize_common_cases', False):
if getattr(self.extra_opts, 'italicize_common_cases', False):
html = self.markup_italicis(html)
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
@ -487,7 +486,7 @@ class HeuristicProcessor(object):
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor)
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
###### Unwrap lines ######
if getattr(self.extra_opts, 'unwrap_lines', False):
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor

View File

@ -21,7 +21,7 @@ from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation
from calibre.constants import islinux, isfreebsd, iswindows
from calibre import unicode_path
from calibre import unicode_path, as_unicode
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
@ -111,7 +111,7 @@ class HTMLFile(object):
with open(self.path, 'rb') as f:
src = f.read()
except IOError, err:
msg = 'Could not read from file: %s with error: %s'%(self.path, unicode(err))
msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err))
if level == 0:
raise IOError(msg)
raise IgnoreFile(msg, err.errno)

View File

@ -7,8 +7,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.conversion.utils import HeuristicProcessor
class LITInput(InputFormatPlugin):

View File

@ -3,7 +3,6 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from calibre.customize.conversion import InputFormatPlugin
class MOBIInput(InputFormatPlugin):

View File

@ -199,8 +199,8 @@ class EbookIterator(object):
not hasattr(self.pathtoopf, 'manifest'):
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
plumber.input_plugin)
self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
plumber.opts)
if hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

View File

@ -53,6 +53,7 @@ class TXTInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
self.log = log
log.debug('Reading text from file...')
txt = stream.read()
@ -106,7 +107,7 @@ class TXTInput(InputFormatPlugin):
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
# Dehyphenate
dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
dehyphenator = Dehyphenator(options.verbose, log=self.log)
txt = dehyphenator(txt,'txt', length)
# We don't check for block because the processor assumes block.

View File

@ -51,12 +51,12 @@ class TXTOutput(OutputFormatPlugin):
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Do not remove links within the document. This is only ' \
'useful when paired with the markdown-format option because' \
'links are always removed with plain text output.')),
' links are always removed with plain text output.')),
OptionRecommendation(name='keep_image_references',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Do not remove image references within the document. This is only ' \
'useful when paired with the markdown-format option because' \
'image references are always removed with plain text output.')),
' image references are always removed with plain text output.')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):

View File

@ -94,7 +94,7 @@ class BulkConfig(Config):
if not c: break
self.stack.removeWidget(c)
widgets = [lf, hw, sr, ps, sd, toc]
widgets = [lf, hw, ps, sd, toc, sr]
if output_widget is not None:
widgets.append(output_widget)
for w in widgets:

View File

@ -11,9 +11,10 @@ from calibre.gui2.convert import Widget
class HeuristicsWidget(Widget, Ui_Form):
TITLE = _('Heuristic Processing')
TITLE = _('Heuristic\nProcessing')
HELP = _('Modify the document text and structure using common patterns.')
COMMIT_NAME = 'heuristics'
ICON = I('heuristics.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
@ -25,44 +26,29 @@ class HeuristicsWidget(Widget, Ui_Form):
)
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
self.enable_heuristics(self.opt_enable_heuristics.checkState())
def break_cycles(self):
Widget.break_cycles(self)
try:
self.opt_enable_heuristics.stateChanged.disconnect()
self.opt_unwrap_lines.stateChanged.disconnect()
except:
pass
def set_value_handler(self, g, val):
if val is None and g is self.opt_html_unwrap_factor:
g.setValue(0.0)
return True
def enable_heuristics(self, state):
if state == Qt.Checked:
state = True
else:
state = False
self.opt_markup_chapter_headings.setEnabled(state)
self.opt_italicize_common_cases.setEnabled(state)
self.opt_fix_indents.setEnabled(state)
self.opt_delete_blank_paragraphs.setEnabled(state)
self.opt_format_scene_breaks.setEnabled(state)
self.opt_dehyphenate.setEnabled(state)
self.opt_renumber_headings.setEnabled(state)
self.opt_unwrap_lines.setEnabled(state)
if state and self.opt_unwrap_lines.checkState() == Qt.Checked:
self.opt_html_unwrap_factor.setEnabled(True)
else:
self.opt_html_unwrap_factor.setEnabled(False)
state = state == Qt.Checked
self.heuristic_options.setEnabled(state)
def enable_unwrap(self, state):
if state == Qt.Checked:

View File

@ -6,7 +6,7 @@
<rect>
<x>0</x>
<y>0</y>
<width>938</width>
<width>724</width>
<height>470</height>
</rect>
</property>
@ -15,114 +15,160 @@
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QCheckBox" name="opt_enable_heuristics">
<widget class="QLabel" name="label">
<property name="text">
<string>&amp;Preprocess input file to possibly improve structure detection</string>
<string>&lt;b&gt;Heuristic processing&lt;/b&gt; means that calibre will scan your book for common patterns and fix them. As the name implies, this involves guesswork, which means that it could end up worsening the result of a conversion, if calibre guesses wrong. Therefore, it is disabled by default. Often, if a conversion does not turn out as you expect, turning on heuristics can improve matters.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox">
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Fixed</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>15</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QCheckBox" name="opt_enable_heuristics">
<property name="text">
<string>Enable &amp;heuristic processing</string>
</property>
</widget>
</item>
<item>
<widget class="QGroupBox" name="heuristic_options">
<property name="title">
<string>Heuristic Processing</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0" colspan="2">
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<widget class="QCheckBox" name="opt_unwrap_lines">
<property name="text">
<string>Unwrap lines</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLabel" name="huf_label">
<property name="text">
<string>Line &amp;un-wrap factor during preprocess:</string>
</property>
<property name="buddy">
<cstring>opt_html_unwrap_factor</cstring>
</property>
</widget>
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeType">
<enum>QSizePolicy::Fixed</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QLabel" name="huf_label">
<property name="text">
<string>Line &amp;un-wrap factor :</string>
</property>
<property name="buddy">
<cstring>opt_html_unwrap_factor</cstring>
</property>
</widget>
</item>
<item>
<widget class="QDoubleSpinBox" name="opt_html_unwrap_factor">
<property name="toolTip">
<string/>
</property>
<property name="maximum">
<double>1.000000000000000</double>
</property>
<property name="singleStep">
<double>0.050000000000000</double>
</property>
<property name="value">
<double>0.400000000000000</double>
</property>
</widget>
</item>
<item>
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
<item row="1" column="2">
<widget class="QDoubleSpinBox" name="opt_html_unwrap_factor">
<property name="toolTip">
<string/>
</property>
<property name="maximum">
<double>1.000000000000000</double>
</property>
<property name="singleStep">
<double>0.050000000000000</double>
</property>
<property name="value">
<double>0.400000000000000</double>
</property>
</widget>
</item>
<item row="1" column="3">
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item row="2" column="0" colspan="4">
<item>
<widget class="QCheckBox" name="opt_markup_chapter_headings">
<property name="text">
<string>Detect and markup unformatted chapter headings and sub headings</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="4">
<item>
<widget class="QCheckBox" name="opt_renumber_headings">
<property name="text">
<string>Renumber sequences of &lt;h1&gt; or &lt;h2&gt; tags to prevent splitting</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<item>
<widget class="QCheckBox" name="opt_delete_blank_paragraphs">
<property name="text">
<string>Delete blank lines between paragraphs</string>
</property>
</widget>
</item>
<item row="5" column="0" colspan="3">
<item>
<widget class="QCheckBox" name="opt_format_scene_breaks">
<property name="text">
<string>Ensure scene breaks are consistently formatted</string>
</property>
</widget>
</item>
<item row="6" column="0" colspan="2">
<item>
<widget class="QCheckBox" name="opt_dehyphenate">
<property name="text">
<string>Remove unnecessary hyphens</string>
</property>
</widget>
</item>
<item row="7" column="0" colspan="2">
<item>
<widget class="QCheckBox" name="opt_italicize_common_cases">
<property name="text">
<string>Italicize common words and patterns</string>
</property>
</widget>
</item>
<item row="8" column="0" colspan="2">
<item>
<widget class="QCheckBox" name="opt_fix_indents">
<property name="text">
<string>Replace entity indents with CSS indents</string>
</property>
</widget>
</item>
<item row="9" column="0" colspan="2">
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>

View File

@ -12,9 +12,10 @@ from calibre.gui2 import error_dialog
class SearchAndReplaceWidget(Widget, Ui_Form):
TITLE = _('Search &\nReplace')
TITLE = _(u'Search\u00a0&\nReplace')
HELP = _('Modify the document text and structure using user defined patterns.')
COMMIT_NAME = 'search_and_replace'
ICON = I('search.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
@ -24,19 +25,19 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
)
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
self.opt_sr1_search.set_msg(_('Search Regular Expression'))
self.opt_sr1_search.set_msg(_('&Search Regular Expression'))
self.opt_sr1_search.set_book_id(book_id)
self.opt_sr1_search.set_db(db)
self.opt_sr2_search.set_msg(_('Search Regular Expression'))
self.opt_sr2_search.set_msg(_('&Search Regular Expression'))
self.opt_sr2_search.set_book_id(book_id)
self.opt_sr2_search.set_db(db)
self.opt_sr3_search.set_msg(_('Search Regular Expression'))
self.opt_sr3_search.set_msg(_('&Search Regular Expression'))
self.opt_sr3_search.set_book_id(book_id)
self.opt_sr3_search.set_db(db)
def break_cycles(self):
Widget.break_cycles(self)
self.opt_sr1_search.break_cycles()
self.opt_sr2_search.break_cycles()
self.opt_sr3_search.break_cycles()
@ -49,6 +50,6 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
re.compile(pat)
except Exception, err:
error_dialog(self, _('Invalid regular expression'),
_('Invalid regular expression: %s')%err).exec_()
_('Invalid regular expression: %s')%err, show=True)
return False
return True

View File

@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>198</width>
<height>350</height>
<width>468</width>
<height>451</height>
</rect>
</property>
<property name="sizePolicy">
@ -23,7 +23,7 @@
<property name="sizeConstraint">
<enum>QLayout::SetDefaultConstraint</enum>
</property>
<item row="0" column="0">
<item row="1" column="0">
<widget class="QGroupBox" name="groupBox">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
@ -32,7 +32,7 @@
</sizepolicy>
</property>
<property name="title">
<string>1.</string>
<string>First expression</string>
</property>
<layout class="QGridLayout" name="gridLayout_2">
<property name="sizeConstraint">
@ -57,7 +57,10 @@
</sizepolicy>
</property>
<property name="text">
<string>Replacement Text</string>
<string>&amp;Replacement Text</string>
</property>
<property name="buddy">
<cstring>opt_sr1_replace</cstring>
</property>
</widget>
</item>
@ -74,7 +77,7 @@
</layout>
</widget>
</item>
<item row="1" column="0">
<item row="2" column="0">
<widget class="QGroupBox" name="groupBox_2">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
@ -83,7 +86,7 @@
</sizepolicy>
</property>
<property name="title">
<string>2.</string>
<string>Second Expression</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<property name="sizeConstraint">
@ -108,7 +111,10 @@
</sizepolicy>
</property>
<property name="text">
<string>Replacement Text</string>
<string>&amp;Replacement Text</string>
</property>
<property name="buddy">
<cstring>opt_sr2_replace</cstring>
</property>
</widget>
</item>
@ -125,7 +131,7 @@
</layout>
</widget>
</item>
<item row="2" column="0">
<item row="3" column="0">
<widget class="QGroupBox" name="groupBox_3">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Preferred">
@ -134,7 +140,7 @@
</sizepolicy>
</property>
<property name="title">
<string>3.</string>
<string>Third expression</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<property name="sizeConstraint">
@ -159,7 +165,10 @@
</sizepolicy>
</property>
<property name="text">
<string>Replacement Text</string>
<string>&amp;Replacement Text</string>
</property>
<property name="buddy">
<cstring>opt_sr3_replace</cstring>
</property>
</widget>
</item>
@ -176,6 +185,19 @@
</layout>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>&lt;p&gt;Search and replace uses &lt;i&gt;regular expressions&lt;/i&gt;. See the &lt;a href=&quot;http://calibre-ebook.com/user_manual/regexp.html&quot;&gt;regular expressions tutorial&lt;/a&gt; to get started with regular expressions. Also clicking the wizard buttons below will allow you to test your regular expression against the current input document.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="openExternalLinks">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
<customwidgets>

View File

@ -207,7 +207,7 @@ class Config(ResizableDialog, Ui_Dialog):
if not c: break
self.stack.removeWidget(c)
widgets = [self.mw, lf, hw, sr, ps, sd, toc]
widgets = [self.mw, lf, hw, ps, sd, toc, sr]
if input_widget is not None:
widgets.append(input_widget)
if output_widget is not None:

View File

@ -100,7 +100,7 @@
</size>
</property>
<property name="spacing">
<number>20</number>
<number>10</number>
</property>
<property name="wordWrap">
<bool>true</bool>
@ -129,8 +129,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>805</width>
<height>484</height>
<width>810</width>
<height>494</height>
</rect>
</property>
<layout class="QVBoxLayout" name="verticalLayout_3">

View File

@ -31,7 +31,7 @@ class StructureDetectionWidget(Widget, Ui_Form):
self.opt_chapter.set_msg(_('Detect chapters at (XPath expression):'))
self.opt_page_breaks_before.set_msg(_('Insert page breaks before '
'(XPath expression):'))
def break_cycles(self):
Widget.break_cycles(self)

View File

@ -23,9 +23,9 @@ class PluginWidget(Widget, Ui_Form):
['newline', 'max_line_length', 'force_max_line_length',
'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references',
'txt_output_encoding'])
self.db, self.book_id = db, book_id
self.db, self.book_id = db, book_id
for x in get_option('newline').option.choices:
self.opt_newline.addItem(x)
self.opt_newline.addItem(x)
self.initialize_options(get_option, get_help, db, book_id)
self.opt_markdown_format.stateChanged.connect(self.enable_markdown_format)
@ -33,17 +33,14 @@ class PluginWidget(Widget, Ui_Form):
def break_cycles(self):
Widget.break_cycles(self)
try:
self.opt_markdown_format.stateChanged.disconnect()
except:
pass
def enable_markdown_format(self, state):
if state == Qt.Checked:
state = True
else:
state = False
state = state == Qt.Checked
self.opt_keep_links.setEnabled(state)
self.opt_keep_image_references.setEnabled(state)

View File

@ -6,7 +6,7 @@
<rect>
<x>0</x>
<y>0</y>
<width>434</width>
<width>430</width>
<height>74</height>
</rect>
</property>
@ -59,7 +59,7 @@
<string>...</string>
</property>
<property name="icon">
<iconset>
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/wizard.png</normaloff>:/images/wizard.png</iconset>
</property>
<property name="iconSize">

View File

@ -19,7 +19,7 @@ from calibre.devices.scanner import DeviceScanner
from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \
warning_dialog, info_dialog, choose_dir
from calibre.ebooks.metadata import authors_to_string
from calibre import preferred_encoding, prints, force_unicode
from calibre import preferred_encoding, prints, force_unicode, as_unicode
from calibre.utils.filenames import ascii_filename
from calibre.devices.errors import FreeSpaceError
from calibre.devices.apple.driver import ITUNES_ASYNC
@ -68,13 +68,7 @@ class DeviceJob(BaseJob): # {{{
if self._aborted:
return
self.failed = True
try:
ex = unicode(err)
except:
try:
ex = str(err).decode(preferred_encoding, 'replace')
except:
ex = repr(err)
ex = as_unicode(err)
self._details = ex + '\n\n' + \
traceback.format_exc()
self.exception = err

View File

@ -85,6 +85,9 @@
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="opt_use_subdirs">
<property name="toolTip">
<string>If checked, books are placed into sub directories based on their metadata on the device. If unchecked, books are all put into the top level directory.</string>
</property>
<property name="text">
<string>Use sub directories</string>
</property>

View File

@ -292,7 +292,7 @@ class CcEnumDelegate(QStyledItemDelegate): # {{{
def createEditor(self, parent, option, index):
m = index.model()
col = m.column_map[index.column()]
editor = QComboBox(parent)
editor = DelegateCB(parent)
editor.addItem('')
for v in m.custom_columns[col]['display']['enum_values']:
editor.addItem(v)
@ -353,6 +353,17 @@ class CcCommentsDelegate(QStyledItemDelegate): # {{{
model.setData(index, QVariant(editor.textbox.html), Qt.EditRole)
# }}}
class DelegateCB(QComboBox): # {{{
def __init__(self, parent):
QComboBox.__init__(self, parent)
def event(self, e):
if e.type() == e.ShortcutOverride:
e.accept()
return QComboBox.event(self, e)
# }}}
class CcBoolDelegate(QStyledItemDelegate): # {{{
def __init__(self, parent):
'''
@ -361,7 +372,7 @@ class CcBoolDelegate(QStyledItemDelegate): # {{{
QStyledItemDelegate.__init__(self, parent)
def createEditor(self, parent, option, index):
editor = QComboBox(parent)
editor = DelegateCB(parent)
items = [_('Y'), _('N'), ' ']
icons = [I('ok.png'), I('list_remove.png'), I('blank.png')]
if tweaks['bool_custom_columns_are_tristate'] == 'no':

View File

@ -85,8 +85,8 @@ class CommonOptions(Base):
def load_conversion_widgets(self):
self.conversion_widgets = [LookAndFeelWidget, HeuristicsWidget,
SearchAndReplaceWidget, PageSetupWidget,
StructureDetectionWidget, TOCWidget]
PageSetupWidget,
StructureDetectionWidget, TOCWidget, SearchAndReplaceWidget,]
class InputOptions(Base):

View File

@ -453,10 +453,10 @@ class CompleteLineEdit(EnLineEdit):
def update_items_cache(self, complete_items):
self.completer.update_items_cache(complete_items)
def set_separator(self, sep):
self.separator = sep
def set_space_before_sep(self, space_before):
self.space_before_sep = space_before
@ -501,7 +501,7 @@ class EnComboBox(QComboBox):
def __init__(self, *args):
QComboBox.__init__(self, *args)
self.setLineEdit(EnLineEdit(self))
self.setAutoCompletionCaseSensitivity(Qt.CaseSensitive)
self.setAutoCompletionCaseSensitivity(Qt.CaseInsensitive)
self.setMinimumContentsLength(20)
def text(self):
@ -515,17 +515,17 @@ class EnComboBox(QComboBox):
self.setCurrentIndex(idx)
class CompleteComboBox(EnComboBox):
def __init__(self, *args):
EnComboBox.__init__(self, *args)
self.setLineEdit(CompleteLineEdit(self))
def update_items_cache(self, complete_items):
self.lineEdit().update_items_cache(complete_items)
def set_separator(self, sep):
self.lineEdit().set_separator(sep)
def set_space_before_sep(self, space_before):
self.lineEdit().set_space_before_sep(space_before)

View File

@ -18,7 +18,8 @@ from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.oeb.base import XHTML_NS
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.config import config_dir
from calibre.utils.date import format_date, isoformat, now as nowf
from calibre.utils.date import format_date, isoformat, is_date_undefined, now as nowf
from calibre.utils.icu import capitalize
from calibre.utils.logging import default_log as log
from calibre.utils.zipfile import ZipFile, ZipInfo
from calibre.utils.magick.draw import thumbnail
@ -1026,17 +1027,12 @@ class EPUB_MOBI(CatalogPlugin):
self.__totalSteps += 3
# Load section list templates
templates = ['by_authors_normal_title_template',
'by_authors_series_title_template',
'by_titles_normal_title_template',
'by_titles_series_title_template',
'by_series_title_template',
'by_genres_normal_title_template',
'by_genres_series_title_template',
'by_recently_added_normal_title_template',
'by_recently_added_series_title_template',
'by_month_added_normal_title_template',
'by_month_added_series_title_template']
templates = []
with open(P('catalog/section_list_templates.py'), 'r') as f:
for line in f:
t = re.match("(by_.+_template)",line)
if t:
templates.append(t.group(1))
execfile(P('catalog/section_list_templates.py'), locals())
for t in templates:
setattr(self,t,eval(t))
@ -1440,7 +1436,9 @@ class EPUB_MOBI(CatalogPlugin):
# Exit if author matches previous, but author_sort doesn't match
if author[0] == current_author[0]:
error_msg = _('''
Inconsistent Author Sort values for Author '{0}' ('{1}' <> '{2}'), unable to build catalog.\n
Inconsistent Author Sort values for Author '{0}':
'{1}' <> '{2}',
unable to build catalog.\n
Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
self.opts.log.warn('\n*** Metadata error ***')
@ -1449,17 +1447,13 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
self.error.append('Metadata error')
self.error.append(error_msg)
return False
current_author = author
self.booksByAuthor = sorted(self.booksByAuthor, key=self.booksByAuthorSorter_author_sort)
# for book in self.booksByAuthor:
# print '{0:<10} {1:<5} {2:<20} {3:<20} {4:<20} {5:<20}'.format(book['series'], book['series_index'], book['title'],
# book['author'], book['authors'],book['author_sort'])
# print
# Build the unique_authors set from existing data
authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
authors = [(record['author'], capitalize(record['author_sort'])) for record in self.booksByAuthor]
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
# authors[]: (([0]:friendly [1]:sort))
@ -1565,7 +1559,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
this_title['rating'] = record['rating'] if record['rating'] else 0
if re.match('0100-01-01',str(record['pubdate'].date())):
if is_date_undefined(record['pubdate']):
this_title['date'] = None
else:
this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple())
@ -2681,8 +2675,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
# Use series, series index if avail else just title
#aTag.insert(0,'%d. %s &middot; %s' % (book['series_index'],escape(book['title']), ' & '.join(book['authors'])))
# Reassert 'date' since this is the result of a new search
if re.match('0100-01-01',str(book['pubdate'].date())):
if is_date_undefined(book['pubdate']):
book['date'] = None
else:
book['date'] = strftime(u'%B %Y', book['pubdate'].timetuple())
@ -2756,7 +2749,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
this_book = {}
this_book['author'] = book['author']
this_book['title'] = book['title']
this_book['author_sort'] = book['author_sort'].capitalize()
this_book['author_sort'] = capitalize(book['author_sort'])
this_book['read'] = book['read']
this_book['tags'] = book['tags']
this_book['id'] = book['id']
@ -3901,14 +3894,14 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
Sort non-series books before series books
'''
if not book['series']:
key = '%s %s' % (book['author_sort'].capitalize(),
book['title_sort'].capitalize())
key = '%s %s' % (capitalize(book['author_sort']),
capitalize(book['title_sort']))
else:
index = book['series_index']
integer = int(index)
fraction = index-integer
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
key = '%s ~%s %s' % (book['author_sort'].capitalize(),
key = '%s ~%s %s' % (capitalize(book['author_sort']),
self.generateSortTitle(book['series']),
series_index)
return key
@ -3919,7 +3912,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
'''
if not book['series']:
key = '%s %s' % (self.author_to_author_sort(book['author']),
book['title_sort'].capitalize())
capitalize(book['title_sort']))
else:
index = book['series_index']
integer = int(index)
@ -4313,10 +4306,11 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
formats = ' &middot; '.join(formats)
# Date of publication
pubdate = book['date']
pubmonth, pubyear = pubdate.split()
if pubyear == '101':
pubdate = pubmonth = pubyear = ''
if book['date']:
pubdate = book['date']
pubmonth, pubyear = pubdate.split()
else:
pubdate = pubyear = pubmonth = ''
# Thumb
_soup = BeautifulSoup('<html>',selfClosingTags=['img'])
@ -4570,7 +4564,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
if self.letter_or_symbol(word[0]) != word[0]:
if word[0] > 'A' or (ord('9') < ord(word[0]) < ord('A')) :
translated.append('/')
translated.append(word.capitalize())
translated.append(capitalize(word))
else:
if re.search('[0-9]+',word[0]):

View File

@ -266,14 +266,14 @@ from bad formatting. Because these functions rely on common patterns, be aware t
option may lead to worse results, so use with care. As an example, several of these options will
remove all non-breaking-space entities.
:guilabel:`Preprocess input`
This option activates various activates |app|'s Heuristic Processing stage of the conversion pipeline.
:guilabel:`Enable heuristic processing`
This option activates |app|'s Heuristic Processing stage of the conversion pipeline.
This must be enabled in order for various sub-functions to be applied
:guilabel:`Unwrap lines`
Enabling this option will cause |app| to attempt to detect and correct hard line breaks that exist
within a document using punctuation clues and line length. |app| will first attempt to detect whether
hard line breaks exist, if they do not appear to exist |app| will not attempt to unwrap lines. The
within a document using punctuation clues and line length. |app| will first attempt to detect whether
hard line breaks exist, if they do not appear to exist |app| will not attempt to unwrap lines. The
line-unwrap factor can be reduced if you want to 'force' |app| to unwrap lines.
:guilabel:`Line-unwrap factor`
@ -284,21 +284,21 @@ remove all non-breaking-space entities.
:guilabel:`Detect and markup unformatted chapter headings and sub headings`
If your document does not have Chapter Markers and titles formatted differently from the rest of the text,
|app| can use this option to attempt detection them and surround them with heading tags. &lt;h2&gt; tags are used
for chapter headings; &lt;h3&gt; tags are used for any titles that are detected.
|app| can use this option to attempt detection them and surround them with heading tags. <h2> tags are used
for chapter headings; <h3> tags are used for any titles that are detected.
This function will not create a TOC, but in many cases it will cause |app|'s default chapter detection settings
to correctly detect chapters and build a TOC. Adjust the Xpath under Structure Detection if a TOC is not automatically
to correctly detect chapters and build a TOC. Adjust the XPath under Structure Detection if a TOC is not automatically
created. If there are no other headings used in the document then setting "//h:h2" under Structure Detection would
be the easiest way to create a TOC for the document.
The inserted headings are not formatted, to apply formatting use the 'extra_css' option under
The inserted headings are not formatted, to apply formatting use the :guilabel:`Extra CSS` option under
the Look and Feel conversion settings. For example, to center heading tags, use the following::
h2, h3 { text-align: center }
:guilabel:`Renumber sequences of &lt;h1&gt; or &lt;h2&gt; tags`
Some publishers format chapter headings using multiple &lt;h1&gt; or &lt;h2&gt; tags sequentially.
:guilabel:`Renumber sequences of <h1> or <h2> tags`
Some publishers format chapter headings using multiple <h1> or <h2> tags sequentially.
|app|'s default conversion settings will cause such titles to be split into two pieces. This option
will re-number the heading tags to prevent splitting.
@ -345,7 +345,7 @@ specifying a replacement expression.
The search works by using a python regular expression. All matched text is simply removed from
the document or replaced using the replacement pattern. You can learn more about regular expressions and
their syntax at http://docs.python.org/library/re.html.
their syntax at :ref:`regexptutorial`.
.. _structure-detection:

View File

@ -107,10 +107,10 @@ My device is not being detected by |app|?
Follow these steps to find the problem:
* Make sure that you are connecting only a single device to your computer at a time. Do not have another |app| supported device like an iPhone/iPad etc. at the same time.
* Make sure you are running the latest version of |app|. The latest version can always be downloaded from `http://calibre-ebook.com/download`_.
* Make sure you are running the latest version of |app|. The latest version can always be downloaded from `the calibre website <http://calibre-ebook.com/download>`_.
* Ensure your operating system is seeing the device. That is, the device should be mounted as a disk that you can access using Windows explorer or whatever the file management program on your computer is
* In calibre, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled.
* If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `http://bugs.calibre-ebook.com`_.
* If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker <http://bugs.calibre-ebook.com>`_.
How does |app| manage collections on my SONY reader?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -21,7 +21,7 @@ This is, inevitably, going to be somewhat technical- after all, regular expressi
Where in |app| can you use regular expressions?
---------------------------------------------------
There are a few places |app| uses regular expressions. There's the header/footer removal in conversion options, metadata detection from filenames in the import settings and, since last version, there's the option to use regular expressions to search and replace in metadata of multiple books.
There are a few places |app| uses regular expressions. There's the Search & Replace in conversion options, metadata detection from filenames in the import settings and Search & Replace when editing the metadata of books in bulk.
What on earth *is* a regular expression?
------------------------------------------------
@ -94,7 +94,7 @@ I think I'm beginning to understand these regular expressions now... how do I us
Conversions
^^^^^^^^^^^^^^
Let's begin with the conversion settings, which is really neat. In the structure detection part, you can input a regexp (short for regular expression) that describes the header or footer string that will be removed during the conversion. The neat part is the wizard. Click on the wizard staff and you get a preview of what |app| "sees" during the conversion process. Scroll down to the header or footer you want to remove, select and copy it, paste it into the regexp field on top of the window. If there are variable parts, like page numbers or so, use sets and quantifiers to cover those, and while you're at it, remember to escape special characters, if there are some. Hit the button labeled :guilabel:`Test` and |app| highlights the parts it would remove were you to use the regexp. Once you're satisfied, hit OK and convert. Be careful if your conversion source has tags like this example::
Let's begin with the conversion settings, which is really neat. In the Search and Replace part, you can input a regexp (short for regular expression) that describes the string that will be replaced during the conversion. The neat part is the wizard. Click on the wizard staff and you get a preview of what |app| "sees" during the conversion process. Scroll down to the string you want to remove, select and copy it, paste it into the regexp field on top of the window. If there are variable parts, like page numbers or so, use sets and quantifiers to cover those, and while you're at it, remember to escape special characters, if there are some. Hit the button labeled :guilabel:`Test` and |app| highlights the parts it would replace were you to use the regexp. Once you're satisfied, hit OK and convert. Be careful if your conversion source has tags like this example::
Maybe, but the cops feel like you do, Anita. What's one more dead vampire?
New laws don't change that. </p>
@ -104,7 +104,7 @@ Let's begin with the conversion settings, which is really neat. In the structure
<p class="calibre4"> It had only been two years since Addison v. Clark.
The court case gave us a revised version of what life was
(shamelessly ripped out of `this thread <http://www.mobileread.com/forums/showthread.php?t=75594">`_). You'd have to remove some of the tags as well. In this example, I'd recommend beginning with the tag ``<b class="calibre2">``, now you have to end with the corresponding closing tag (opening tags are ``<tag>``, closing tags are ``</tag>``), which is simply the next ``</b>`` in this case. (Refer to a good HTML manual or ask in the forum if you are unclear on this point.) The opening tag can be described using ``<b.*?>``, the closing tag using ``</b>``, thus we could remove everything between those tags using ``<b.*?>.*?</b>``. But using this expression would be a bad idea, because it removes everything enclosed by <b>- tags (which, by the way, render the enclosed text in bold print), and it's a fair bet that we'll remove portions of the book in this way. Instead, include the beginning of the enclosed string as well, making the regular expression ``<b.*?>\s*Generated\s+by\s+ABC\s+Amber\s+LIT.*?</b>`` The ``\s`` with quantifiers are included here instead of explicitly using the spaces as seen in the string to catch any variations of the string that might occur. Remember to check what |app| will remove to make sure you don't remove any portions you want to keep if you test a new expression. If you only check one occurrence, you might miss a mismatch somewhere else in the text. Also note that should you accidentally remove more or fewer tags than you actually wanted to, |app| tries to repair the damaged code after doing the header/footer removal.
(shamelessly ripped out of `this thread <http://www.mobileread.com/forums/showthread.php?t=75594">`_). You'd have to remove some of the tags as well. In this example, I'd recommend beginning with the tag ``<b class="calibre2">``, now you have to end with the corresponding closing tag (opening tags are ``<tag>``, closing tags are ``</tag>``), which is simply the next ``</b>`` in this case. (Refer to a good HTML manual or ask in the forum if you are unclear on this point.) The opening tag can be described using ``<b.*?>``, the closing tag using ``</b>``, thus we could remove everything between those tags using ``<b.*?>.*?</b>``. But using this expression would be a bad idea, because it removes everything enclosed by <b>- tags (which, by the way, render the enclosed text in bold print), and it's a fair bet that we'll remove portions of the book in this way. Instead, include the beginning of the enclosed string as well, making the regular expression ``<b.*?>\s*Generated\s+by\s+ABC\s+Amber\s+LIT.*?</b>`` The ``\s`` with quantifiers are included here instead of explicitly using the spaces as seen in the string to catch any variations of the string that might occur. Remember to check what |app| will remove to make sure you don't remove any portions you want to keep if you test a new expression. If you only check one occurrence, you might miss a mismatch somewhere else in the text. Also note that should you accidentally remove more or fewer tags than you actually wanted to, |app| tries to repair the damaged code after doing the removal.
Adding books
^^^^^^^^^^^^^^^^

View File

@ -46,6 +46,17 @@ local_tz = _local_tz = SafeLocalTimeZone()
UNDEFINED_DATE = datetime(101,1,1, tzinfo=utc_tz)
def is_date_undefined(qt_or_dt):
d = qt_or_dt
if d is None:
return True
if hasattr(d, 'toString'):
d = datetime(d.year(), d.month(), d.day(), tzinfo=utc_tz)
return d.year < UNDEFINED_DATE.year or (
d.year == UNDEFINED_DATE.year and
d.month == UNDEFINED_DATE.month and
d.day == UNDEFINED_DATE.day)
def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
'''
Parse a date/time string into a timezone aware datetime object. The timezone

View File

@ -554,6 +554,8 @@ class SMTP:
def encode_cram_md5(challenge, user, password):
challenge = base64.decodestring(challenge)
if isinstance(password, unicode): # Added by Kovid, see http://bugs.python.org/issue5285
password = password.encode('utf-8')
response = user + " " + hmac.HMAC(password, challenge).hexdigest()
return encode_base64(response, eol="")