mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
9c3d85d4a5
@ -9,6 +9,8 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
title = 'The AJC'
|
title = 'The AJC'
|
||||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
|
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
title = 'Nealz Nuze'
|
title = 'Nealz Nuze'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import re
|
import re
|
||||||
|
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
title = 'Popular Science'
|
title = 'Popular Science'
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
telegraph.co.uk
|
telegraph.co.uk
|
||||||
'''
|
'''
|
||||||
@ -8,14 +7,16 @@ telegraph.co.uk
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class TelegraphUK(BasicNewsRecipe):
|
class TelegraphUK(BasicNewsRecipe):
|
||||||
title = u'Telegraph.co.uk'
|
title = 'Telegraph.co.uk'
|
||||||
__author__ = 'Darko Miletic and Sujata Raman'
|
__author__ = 'Darko Miletic and Sujata Raman'
|
||||||
description = 'News from United Kingdom'
|
description = 'News from United Kingdom'
|
||||||
oldest_article = 7
|
oldest_article = 2
|
||||||
|
category = 'news, politics, UK'
|
||||||
|
publisher = 'Telegraph Media Group ltd.'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'en'
|
language = 'en_GB'
|
||||||
|
remove_empty_feeds = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
@ -27,13 +28,20 @@ class TelegraphUK(BasicNewsRecipe):
|
|||||||
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
.imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'storyHead'})
|
dict(name='div', attrs={'class':['storyHead','byline']})
|
||||||
,dict(name='div', attrs={'class':'story' })
|
,dict(name='div', attrs={'id':'mainBodyArea' })
|
||||||
#,dict(name='div', attrs={'class':['slideshowHD gutterUnder',"twoThirds gutter","caption" ] })
|
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide']})
|
remove_tags = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide','related_links_video']})
|
||||||
#,dict(name='div', attrs={'class':['toolshideoneQuarter']})
|
,dict(name='ul' , attrs={'class':['shareThis shareBottom']})
|
||||||
,dict(name='span', attrs={'class':['num','placeComment']})
|
,dict(name='span', attrs={'class':['num','placeComment']})
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -51,24 +59,7 @@ class TelegraphUK(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
|
url = article.get('link', None)
|
||||||
url = article.get('guid', None)
|
|
||||||
|
|
||||||
if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
|
if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
|
||||||
url = None
|
url = None
|
||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
def postprocess_html(self,soup,first):
|
|
||||||
|
|
||||||
for bylineTag in soup.findAll(name='div', attrs={'class':'byline'}):
|
|
||||||
for pTag in bylineTag.findAll(name='p'):
|
|
||||||
if getattr(pTag.contents[0],"Comments",True):
|
|
||||||
pTag.extract()
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
40
resources/recipes/twtfb.recipe
Normal file
40
resources/recipes/twtfb.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.thewaythefutureblogs.com
|
||||||
|
Frederik Pohl's Blog
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TheWayTheFutureBlogs(BasicNewsRecipe):
|
||||||
|
title = 'The Way the Future Blogs'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = "Frederik Pohl's blog"
|
||||||
|
publisher = 'Frederik Pohl'
|
||||||
|
category = 'news, SF, books'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
extra_css = ' body{font-family: Georgia,serif } '
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
remove_tags =[dict(name=['meta','object','embed','iframe','base','link'])]
|
||||||
|
keep_only_tags=[dict(attrs={'class':['post','commentlist']})]
|
||||||
|
remove_attributes=['width','height','lang','border']
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://www.thewaythefutureblogs.com/feed/')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
@ -446,7 +446,7 @@ from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
|
|||||||
BOOQ, ELONEX, POCKETBOOK301, MENTOR
|
BOOQ, ELONEX, POCKETBOOK301, MENTOR
|
||||||
from calibre.devices.iliad.driver import ILIAD
|
from calibre.devices.iliad.driver import ILIAD
|
||||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK
|
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||||
from calibre.devices.nook.driver import NOOK
|
from calibre.devices.nook.driver import NOOK
|
||||||
from calibre.devices.prs505.driver import PRS505
|
from calibre.devices.prs505.driver import PRS505
|
||||||
@ -468,14 +468,14 @@ from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
|
|||||||
LibraryThing
|
LibraryThing
|
||||||
from calibre.ebooks.metadata.douban import DoubanBooks
|
from calibre.ebooks.metadata.douban import DoubanBooks
|
||||||
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
|
||||||
LibraryThingCovers
|
LibraryThingCovers, DoubanCovers
|
||||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
||||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||||
|
|
||||||
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
|
||||||
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
LibraryThing, DoubanBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||||
Epubcheck, OpenLibraryCovers, LibraryThingCovers]
|
Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers]
|
||||||
plugins += [
|
plugins += [
|
||||||
ComicInput,
|
ComicInput,
|
||||||
EPUBInput,
|
EPUBInput,
|
||||||
@ -520,6 +520,7 @@ plugins += [
|
|||||||
IREXDR1000,
|
IREXDR1000,
|
||||||
IREXDR800,
|
IREXDR800,
|
||||||
JETBOOK,
|
JETBOOK,
|
||||||
|
JETBOOK_MINI,
|
||||||
MIBUK,
|
MIBUK,
|
||||||
SHINEBOOK,
|
SHINEBOOK,
|
||||||
POCKETBOOK360,
|
POCKETBOOK360,
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
@ -251,6 +252,9 @@ class OutputProfile(Plugin):
|
|||||||
#: The character used to represent a star in ratings
|
#: The character used to represent a star in ratings
|
||||||
ratings_char = u'*'
|
ratings_char = u'*'
|
||||||
|
|
||||||
|
#: Unsupported unicode characters to be replaced during preprocessing
|
||||||
|
unsupported_unicode_chars = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def tags_to_string(cls, tags):
|
def tags_to_string(cls, tags):
|
||||||
return escape(', '.join(tags))
|
return escape(', '.join(tags))
|
||||||
@ -422,6 +426,8 @@ class SonyReaderOutput(OutputProfile):
|
|||||||
dpi = 168.451
|
dpi = 168.451
|
||||||
fbase = 12
|
fbase = 12
|
||||||
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
||||||
|
unsupported_unicode_chars = [u'\u201f', u'\u201b']
|
||||||
|
|
||||||
|
|
||||||
class KoboReaderOutput(OutputProfile):
|
class KoboReaderOutput(OutputProfile):
|
||||||
|
|
||||||
|
@ -120,7 +120,7 @@ def enable_plugin(plugin_or_name):
|
|||||||
config['enabled_plugins'] = ep
|
config['enabled_plugins'] = ep
|
||||||
|
|
||||||
default_disabled_plugins = set([
|
default_disabled_plugins = set([
|
||||||
'Douban Books',
|
'Douban Books', 'Douban.com covers',
|
||||||
])
|
])
|
||||||
|
|
||||||
def is_disabled(plugin):
|
def is_disabled(plugin):
|
||||||
|
@ -56,6 +56,7 @@ def get_connected_device():
|
|||||||
return dev
|
return dev
|
||||||
|
|
||||||
def debug(ioreg_to_tmp=False, buf=None):
|
def debug(ioreg_to_tmp=False, buf=None):
|
||||||
|
import textwrap
|
||||||
from calibre.customize.ui import device_plugins
|
from calibre.customize.ui import device_plugins
|
||||||
from calibre.devices.scanner import DeviceScanner, win_pnp_drives
|
from calibre.devices.scanner import DeviceScanner, win_pnp_drives
|
||||||
from calibre.constants import iswindows, isosx, __version__
|
from calibre.constants import iswindows, isosx, __version__
|
||||||
@ -95,13 +96,19 @@ def debug(ioreg_to_tmp=False, buf=None):
|
|||||||
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
|
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
|
||||||
ioreg += Device.run_ioreg()
|
ioreg += Device.run_ioreg()
|
||||||
connected_devices = []
|
connected_devices = []
|
||||||
for dev in sorted(device_plugins(), cmp=lambda
|
devplugins = list(sorted(device_plugins(), cmp=lambda
|
||||||
x,y:cmp(x.__class__.__name__, y.__class__.__name__)):
|
x,y:cmp(x.__class__.__name__, y.__class__.__name__)))
|
||||||
out('Looking for', dev.__class__.__name__)
|
out('Available plugins:', textwrap.fill(' '.join([x.__class__.__name__ for x in
|
||||||
|
devplugins])))
|
||||||
|
out(' ')
|
||||||
|
out('Looking for devices...')
|
||||||
|
for dev in devplugins:
|
||||||
connected, det = s.is_device_connected(dev, debug=True)
|
connected, det = s.is_device_connected(dev, debug=True)
|
||||||
if connected:
|
if connected:
|
||||||
|
out('\t\tDetected possible device', dev.__class__.__name__)
|
||||||
connected_devices.append((dev, det))
|
connected_devices.append((dev, det))
|
||||||
|
|
||||||
|
out(' ')
|
||||||
errors = {}
|
errors = {}
|
||||||
success = False
|
success = False
|
||||||
out('Devices possibly connected:', end=' ')
|
out('Devices possibly connected:', end=' ')
|
||||||
|
@ -99,4 +99,30 @@ class MIBUK(USBMS):
|
|||||||
VENDOR_NAME = 'LINUX'
|
VENDOR_NAME = 'LINUX'
|
||||||
WINDOWS_MAIN_MEM = 'WOLDERMIBUK'
|
WINDOWS_MAIN_MEM = 'WOLDERMIBUK'
|
||||||
|
|
||||||
|
class JETBOOK_MINI(USBMS):
|
||||||
|
|
||||||
|
'''
|
||||||
|
['0x4b8',
|
||||||
|
'0x507',
|
||||||
|
'0x100',
|
||||||
|
'ECTACO',
|
||||||
|
'ECTACO ATA/ATAPI Bridge (Bulk-Only)',
|
||||||
|
'Rev.0.20']
|
||||||
|
'''
|
||||||
|
FORMATS = ['fb2', 'txt']
|
||||||
|
|
||||||
|
gui_name = 'JetBook Mini'
|
||||||
|
name = 'JetBook Mini Device Interface'
|
||||||
|
description = _('Communicate with the JetBook Mini reader.')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
|
||||||
|
VENDOR_ID = [0x4b8]
|
||||||
|
PRODUCT_ID = [0x507]
|
||||||
|
BCD = [0x100]
|
||||||
|
VENDOR_NAME = 'ECTACO'
|
||||||
|
WINDOWS_MAIN_MEM = '' # Matches PROD_
|
||||||
|
MAIN_MEMORY_VOLUME_LABEL = 'Jetbook Mini'
|
||||||
|
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
|
||||||
|
@ -62,25 +62,34 @@ def wrap_lines(match):
|
|||||||
else:
|
else:
|
||||||
return ital+' '
|
return ital+' '
|
||||||
|
|
||||||
def line_length(format, raw, percent):
|
class DocAnalysis(object):
|
||||||
'''
|
'''
|
||||||
raw is the raw text to find the line length to use for wrapping.
|
Provides various text analysis functions to determine how the document is structured.
|
||||||
|
format is the type of document analysis will be done against.
|
||||||
|
raw is the raw text to determine the line length to use for wrapping.
|
||||||
|
Blank lines are excluded from analysis
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, format='html', raw=''):
|
||||||
|
raw = raw.replace(' ', ' ')
|
||||||
|
if format == 'html':
|
||||||
|
linere = re.compile('(?<=<p)(?![^>]*>\s*</p>).*?(?=</p>)', re.DOTALL)
|
||||||
|
elif format == 'pdf':
|
||||||
|
linere = re.compile('(?<=<br>)(?!\s*<br>).*?(?=<br>)', re.DOTALL)
|
||||||
|
elif format == 'spanned_html':
|
||||||
|
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
|
||||||
|
self.lines = linere.findall(raw)
|
||||||
|
|
||||||
|
def line_length(self, percent):
|
||||||
|
'''
|
||||||
|
Analyses the document to find the median line length.
|
||||||
percentage is a decimal number, 0 - 1 which is used to determine
|
percentage is a decimal number, 0 - 1 which is used to determine
|
||||||
how far in the list of line lengths to use. The list of line lengths is
|
how far in the list of line lengths to use. The list of line lengths is
|
||||||
ordered smallest to larged and does not include duplicates. 0.5 is the
|
ordered smallest to larged and does not include duplicates. 0.5 is the
|
||||||
median value.
|
median value.
|
||||||
'''
|
'''
|
||||||
raw = raw.replace(' ', ' ')
|
|
||||||
if format == 'html':
|
|
||||||
linere = re.compile('(?<=<p).*?(?=</p>)', re.DOTALL)
|
|
||||||
elif format == 'pdf':
|
|
||||||
linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
|
|
||||||
elif format == 'spanned_html':
|
|
||||||
linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
|
|
||||||
lines = linere.findall(raw)
|
|
||||||
|
|
||||||
lengths = []
|
lengths = []
|
||||||
for line in lines:
|
for line in self.lines:
|
||||||
if len(line) > 0:
|
if len(line) > 0:
|
||||||
lengths.append(len(line))
|
lengths.append(len(line))
|
||||||
|
|
||||||
@ -106,6 +115,52 @@ def line_length(format, raw, percent):
|
|||||||
|
|
||||||
return lengths[index]
|
return lengths[index]
|
||||||
|
|
||||||
|
def line_histogram(self, percent):
|
||||||
|
'''
|
||||||
|
Creates a broad histogram of the document to determine whether it incorporates hard
|
||||||
|
line breaks. Lines are sorted into 20 'buckets' based on length.
|
||||||
|
percent is the percentage of lines that should be in a single bucket to return true
|
||||||
|
The majority of the lines will exist in 1-2 buckets in typical docs with hard line breaks
|
||||||
|
'''
|
||||||
|
minLineLength=20 # Ignore lines under 20 chars (typical of spaces)
|
||||||
|
maxLineLength=1900 # Discard larger than this to stay in range
|
||||||
|
buckets=20 # Each line is divided into a bucket based on length
|
||||||
|
|
||||||
|
#print "there are "+str(len(lines))+" lines"
|
||||||
|
#max = 0
|
||||||
|
#for line in self.lines:
|
||||||
|
# l = len(line)
|
||||||
|
# if l > max:
|
||||||
|
# max = l
|
||||||
|
#print "max line found is "+str(max)
|
||||||
|
# Build the line length histogram
|
||||||
|
hRaw = [ 0 for i in range(0,buckets) ]
|
||||||
|
for line in self.lines:
|
||||||
|
l = len(line)
|
||||||
|
if l > minLineLength and l < maxLineLength:
|
||||||
|
l = int(l/100)
|
||||||
|
#print "adding "+str(l)
|
||||||
|
hRaw[l]+=1
|
||||||
|
|
||||||
|
# Normalize the histogram into percents
|
||||||
|
totalLines = len(self.lines)
|
||||||
|
h = [ float(count)/totalLines for count in hRaw ]
|
||||||
|
#print "\nhRaw histogram lengths are: "+str(hRaw)
|
||||||
|
#print " percents are: "+str(h)+"\n"
|
||||||
|
|
||||||
|
# Find the biggest bucket
|
||||||
|
maxValue = 0
|
||||||
|
for i in range(0,len(h)):
|
||||||
|
if h[i] > maxValue:
|
||||||
|
maxValue = h[i]
|
||||||
|
|
||||||
|
if maxValue < percent:
|
||||||
|
#print "Line lengths are too variable. Not unwrapping."
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
#print str(maxValue)+" of the lines were in one bucket"
|
||||||
|
return True
|
||||||
|
|
||||||
class Dehyphenator(object):
|
class Dehyphenator(object):
|
||||||
'''
|
'''
|
||||||
Analyzes words to determine whether hyphens should be retained/removed. Uses the document
|
Analyzes words to determine whether hyphens should be retained/removed. Uses the document
|
||||||
@ -117,23 +172,41 @@ class Dehyphenator(object):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Add common suffixes to the regex below to increase the likelihood of a match -
|
# Add common suffixes to the regex below to increase the likelihood of a match -
|
||||||
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
||||||
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)ion(s|al(ly)?)?|ings?|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
|
self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|ment(s)?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex)$", re.IGNORECASE)
|
||||||
# remove prefixes if the prefix was not already the point of hyphenation
|
# remove prefixes if the prefix was not already the point of hyphenation
|
||||||
self.prefixes = re.compile(r'^(un|in|ex)$', re.IGNORECASE)
|
self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
|
||||||
self.removeprefix = re.compile(r'^(un|in|ex)', re.IGNORECASE)
|
self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
|
||||||
|
|
||||||
def dehyphenate(self, match):
|
def dehyphenate(self, match):
|
||||||
firsthalf = match.group('firstpart')
|
firsthalf = match.group('firstpart')
|
||||||
secondhalf = match.group('secondpart')
|
secondhalf = match.group('secondpart')
|
||||||
|
try:
|
||||||
|
wraptags = match.group('wraptags')
|
||||||
|
except:
|
||||||
|
wraptags = ''
|
||||||
hyphenated = str(firsthalf) + "-" + str(secondhalf)
|
hyphenated = str(firsthalf) + "-" + str(secondhalf)
|
||||||
dehyphenated = str(firsthalf) + str(secondhalf)
|
dehyphenated = str(firsthalf) + str(secondhalf)
|
||||||
lookupword = self.removesuffixes.sub('', dehyphenated)
|
lookupword = self.removesuffixes.sub('', dehyphenated)
|
||||||
if self.prefixes.match(firsthalf) is None:
|
if self.prefixes.match(firsthalf) is None:
|
||||||
lookupword = self.removeprefix.sub('', lookupword)
|
lookupword = self.removeprefix.sub('', lookupword)
|
||||||
booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
|
|
||||||
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
|
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
|
||||||
match = booklookup.search(self.html)
|
try:
|
||||||
if match:
|
searchresult = self.html.find(str.lower(lookupword))
|
||||||
|
except:
|
||||||
|
return hyphenated
|
||||||
|
if self.format == 'html_cleanup':
|
||||||
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
|
#print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
|
||||||
|
return dehyphenated
|
||||||
|
elif self.html.find(hyphenated) != -1:
|
||||||
|
#print "Cleanup:returned hyphenated word: " + str(hyphenated)
|
||||||
|
return hyphenated
|
||||||
|
else:
|
||||||
|
#print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)
|
||||||
|
return firsthalf+u'\u2014'+wraptags+secondhalf
|
||||||
|
|
||||||
|
else:
|
||||||
|
if self.html.find(lookupword) != -1 or searchresult != -1:
|
||||||
#print "returned dehyphenated word: " + str(dehyphenated)
|
#print "returned dehyphenated word: " + str(dehyphenated)
|
||||||
return dehyphenated
|
return dehyphenated
|
||||||
else:
|
else:
|
||||||
@ -142,17 +215,19 @@ class Dehyphenator(object):
|
|||||||
|
|
||||||
def __call__(self, html, format, length=1):
|
def __call__(self, html, format, length=1):
|
||||||
self.html = html
|
self.html = html
|
||||||
|
self.format = format
|
||||||
if format == 'html':
|
if format == 'html':
|
||||||
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
|
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)-\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)' % length)
|
||||||
elif format == 'pdf':
|
elif format == 'pdf':
|
||||||
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^“"\s>]+)-\s*(<p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
|
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)-\s*(?P<wraptags><p>|</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)'% length)
|
||||||
elif format == 'individual_words':
|
elif format == 'individual_words':
|
||||||
intextmatch = re.compile('>[^<]*\b(?P<firstpart>[^"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
|
intextmatch = re.compile(u'>[^<]*\b(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)-(?P<secondpart)\w+)\b[^<]*<') # for later, not called anywhere yet
|
||||||
|
elif format == 'html_cleanup':
|
||||||
|
intextmatch = re.compile(u'(?P<firstpart>[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)-\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)')
|
||||||
|
|
||||||
html = intextmatch.sub(self.dehyphenate, html)
|
html = intextmatch.sub(self.dehyphenate, html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
|
||||||
class CSSPreProcessor(object):
|
class CSSPreProcessor(object):
|
||||||
|
|
||||||
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
|
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
|
||||||
@ -286,7 +361,7 @@ class HTMLPreProcessor(object):
|
|||||||
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
|
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
|
||||||
|
|
||||||
# Detect Chapters to match default XPATH in GUI
|
# Detect Chapters to match default XPATH in GUI
|
||||||
(re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
|
(re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Kapitel|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head),
|
||||||
# Cover the case where every letter in a chapter title is separated by a space
|
# Cover the case where every letter in a chapter title is separated by a space
|
||||||
(re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
|
(re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head),
|
||||||
|
|
||||||
@ -374,10 +449,8 @@ class HTMLPreProcessor(object):
|
|||||||
print 'Failed to parse remove_footer regexp'
|
print 'Failed to parse remove_footer regexp'
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
# unwrap em/en dashes, delete soft hyphens - moved here so it's executed after header/footer removal
|
# delete soft hyphens - moved here so it's executed after header/footer removal
|
||||||
if is_pdftohtml:
|
if is_pdftohtml:
|
||||||
# unwrap em/en dashes
|
|
||||||
end_rules.append((re.compile(u'(?<=[–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: ''))
|
|
||||||
# unwrap/delete soft hyphens
|
# unwrap/delete soft hyphens
|
||||||
end_rules.append((re.compile(u'[](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
|
end_rules.append((re.compile(u'[](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: ''))
|
||||||
# unwrap/delete soft hyphens with formatting
|
# unwrap/delete soft hyphens with formatting
|
||||||
@ -391,12 +464,15 @@ class HTMLPreProcessor(object):
|
|||||||
|
|
||||||
length = -1
|
length = -1
|
||||||
if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
|
if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
|
||||||
length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
|
docanalysis = DocAnalysis('pdf', html)
|
||||||
|
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
|
||||||
if length:
|
if length:
|
||||||
#print "The pdf line length returned is " + str(length)
|
#print "The pdf line length returned is " + str(length)
|
||||||
|
# unwrap em/en dashes
|
||||||
|
end_rules.append((re.compile(u'(?<=.{%i}[–—])\s*<p>\s*(?=[[a-z\d])' % length), lambda match: ''))
|
||||||
end_rules.append(
|
end_rules.append(
|
||||||
# Un wrap using punctuation
|
# Un wrap using punctuation
|
||||||
(re.compile(r'(?<=.{%i}([a-z,:)\IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
(re.compile(u'(?<=.{%i}([a-z,:)\IA\u00DF]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
||||||
)
|
)
|
||||||
|
|
||||||
for rule in self.PREPROCESS + start_rules:
|
for rule in self.PREPROCESS + start_rules:
|
||||||
@ -454,6 +530,14 @@ class HTMLPreProcessor(object):
|
|||||||
if getattr(self.extra_opts, 'smarten_punctuation', False):
|
if getattr(self.extra_opts, 'smarten_punctuation', False):
|
||||||
html = self.smarten_punctuation(html)
|
html = self.smarten_punctuation(html)
|
||||||
|
|
||||||
|
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
|
||||||
|
if unsupported_unicode_chars:
|
||||||
|
from calibre.ebooks.unidecode.unidecoder import Unidecoder
|
||||||
|
unidecoder = Unidecoder()
|
||||||
|
for char in unsupported_unicode_chars:
|
||||||
|
asciichar = unidecoder.decode(char)
|
||||||
|
html = html.replace(char, asciichar)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def smarten_punctuation(self, html):
|
def smarten_punctuation(self, html):
|
||||||
|
@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.ebooks.conversion.preprocess import line_length, Dehyphenator
|
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
|
|
||||||
class PreProcessor(object):
|
class PreProcessor(object):
|
||||||
@ -77,13 +77,18 @@ class PreProcessor(object):
|
|||||||
|
|
||||||
def __call__(self, html):
|
def __call__(self, html):
|
||||||
self.log("********* Preprocessing HTML *********")
|
self.log("********* Preprocessing HTML *********")
|
||||||
|
|
||||||
|
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
|
||||||
|
html = re.sub(r"\s*</p>", "</p>\n", html)
|
||||||
|
html = re.sub(r"\s*<p>\s*", "\n<p>", html)
|
||||||
|
|
||||||
###### Check Markup ######
|
###### Check Markup ######
|
||||||
#
|
#
|
||||||
# some lit files don't have any <p> tags or equivalent (generally just plain text between
|
# some lit files don't have any <p> tags or equivalent (generally just plain text between
|
||||||
# <pre> tags), check and mark up line endings if required before proceeding
|
# <pre> tags), check and mark up line endings if required before proceeding
|
||||||
if self.no_markup(html, 0.1):
|
if self.no_markup(html, 0.1):
|
||||||
self.log("not enough paragraph markers, adding now")
|
self.log("not enough paragraph markers, adding now")
|
||||||
# check if content is in pre tags, use txt procesor to mark up if so
|
# check if content is in pre tags, use txt processor to mark up if so
|
||||||
pre = re.compile(r'<pre>', re.IGNORECASE)
|
pre = re.compile(r'<pre>', re.IGNORECASE)
|
||||||
if len(pre.findall(html)) == 1:
|
if len(pre.findall(html)) == 1:
|
||||||
self.log("Running Text Processing")
|
self.log("Running Text Processing")
|
||||||
@ -113,47 +118,77 @@ class PreProcessor(object):
|
|||||||
# Get rid of empty <o:p> tags to simplify other processing
|
# Get rid of empty <o:p> tags to simplify other processing
|
||||||
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
|
||||||
# Get rid of empty span, bold, & italics tags
|
# Get rid of empty span, bold, & italics tags
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
|
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*</[ibu]>\s*){0,2}\s*</[ibu]>", " ", html)
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
|
|
||||||
# If more than 40% of the lines are empty paragraphs then delete them to clean up spacing
|
# If more than 40% of the lines are empty paragraphs and the user has enabled remove
|
||||||
|
# paragraph spacing then delete blank lines to clean up spacing
|
||||||
linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
|
||||||
blankreg = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
blankreg = re.compile(r'\s*(?P<openline><p[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
|
||||||
#multi_blank = re.compile(r'(\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>){2,}', re.IGNORECASE)
|
#multi_blank = re.compile(r'(\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>){2,}', re.IGNORECASE)
|
||||||
blanklines = blankreg.findall(html)
|
blanklines = blankreg.findall(html)
|
||||||
lines = linereg.findall(html)
|
lines = linereg.findall(html)
|
||||||
|
blanks_between_paragraphs = False
|
||||||
if len(lines) > 1:
|
if len(lines) > 1:
|
||||||
self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank")
|
self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank")
|
||||||
if float(len(blanklines)) / float(len(lines)) > 0.40 and getattr(self.extra_opts,
|
if float(len(blanklines)) / float(len(lines)) > 0.40 and getattr(self.extra_opts,
|
||||||
'remove_paragraph_spacing', False):
|
'remove_paragraph_spacing', False):
|
||||||
self.log("deleting blank lines")
|
self.log("deleting blank lines")
|
||||||
html = blankreg.sub('', html)
|
html = blankreg.sub('', html)
|
||||||
# Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly
|
elif float(len(blanklines)) / float(len(lines)) > 0.40:
|
||||||
html = re.sub(r"\s*</p>", "</p>\n", html)
|
blanks_between_paragraphs = True
|
||||||
html = re.sub(r"\s*<p>\s*", "\n<p>", html)
|
#print "blanks between paragraphs is marked True"
|
||||||
|
else:
|
||||||
|
blanks_between_paragraphs = False
|
||||||
|
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
|
||||||
# detect chapters/sections to match xpath or splitting logic
|
# detect chapters/sections to match xpath or splitting logic
|
||||||
|
#
|
||||||
|
# Build the Regular Expressions in pieces
|
||||||
|
lookahead = "(?=<(p|div))"
|
||||||
|
chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
|
||||||
|
chapter_header_open = r"(?P<chap>"
|
||||||
|
chapter_header_close = ")\s*"
|
||||||
|
chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)\s[^>]*>)?\s*</(?P=outer)>\s*"
|
||||||
|
if blanks_between_paragraphs:
|
||||||
|
blank_lines = "(\s*<p[^>]*>\s*</p>){0,2}\s*"
|
||||||
|
else:
|
||||||
|
blank_lines = ""
|
||||||
|
opt_title_open = "("
|
||||||
|
title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
|
||||||
|
title_header_open = "(?P<title>"
|
||||||
|
title_header_close = ")\s*"
|
||||||
|
title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"
|
||||||
|
opt_title_close = ")?"
|
||||||
|
|
||||||
|
default_title = r"(\s*[\w\'\"-]+){1,5}(?!<)"
|
||||||
|
typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}"
|
||||||
|
numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*"
|
||||||
|
uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*"
|
||||||
|
|
||||||
|
chapter_marker = lookahead+chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||||
|
#print chapter_marker
|
||||||
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
|
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
|
||||||
self.html_preprocess_sections = len(heading.findall(html))
|
self.html_preprocess_sections = len(heading.findall(html))
|
||||||
self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings")
|
self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings")
|
||||||
#
|
#
|
||||||
# Start with most typical chapter headings, get more aggressive until one works
|
# Start with most typical chapter headings, get more aggressive until one works
|
||||||
if self.html_preprocess_sections < 10:
|
if self.html_preprocess_sections < 10:
|
||||||
chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</[ibu]>){0,2}\s*(</span>)?\s*(</(p|/?br)>)\s*\s*(\s*<p[^>]*>\s*</p>){0,2}\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE|re.VERBOSE)
|
chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
|
||||||
html = chapdetect.sub(self.chapter_head, html)
|
html = chapdetect.sub(self.chapter_head, html)
|
||||||
if self.html_preprocess_sections < 10:
|
if self.html_preprocess_sections < 10:
|
||||||
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
|
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters")
|
||||||
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
|
chapter_marker = lookahead+chapter_line_open+chapter_header_open+numeric_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||||
|
chapdetect2 = re.compile(r'%s' % chapter_marker, re.IGNORECASE)
|
||||||
html = chapdetect2.sub(self.chapter_head, html)
|
html = chapdetect2.sub(self.chapter_head, html)
|
||||||
|
|
||||||
if self.html_preprocess_sections < 10:
|
if self.html_preprocess_sections < 10:
|
||||||
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
|
self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words")
|
||||||
chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu][^>]*>){0,2}\s*.?([A-Z#\-\s]+)\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu][^>]*>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE)
|
chapter_marker = lookahead+chapter_line_open+chapter_header_open+uppercase_chapters+chapter_header_close+chapter_line_close+blank_lines+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close
|
||||||
|
chapdetect2 = re.compile(r'%s' % chapter_marker, re.UNICODE)
|
||||||
html = chapdetect2.sub(self.chapter_head, html)
|
html = chapdetect2.sub(self.chapter_head, html)
|
||||||
|
|
||||||
###### Unwrap lines ######
|
###### Unwrap lines ######
|
||||||
#
|
#
|
||||||
self.log("Unwrapping Lines")
|
|
||||||
# Some OCR sourced files have line breaks in the html using a combination of span & p tags
|
# Some OCR sourced files have line breaks in the html using a combination of span & p tags
|
||||||
# span are used for hard line breaks, p for new paragraphs. Determine which is used so
|
# span are used for hard line breaks, p for new paragraphs. Determine which is used so
|
||||||
# that lines can be un-wrapped across page boundaries
|
# that lines can be un-wrapped across page boundaries
|
||||||
@ -168,25 +203,40 @@ class PreProcessor(object):
|
|||||||
format = 'html'
|
format = 'html'
|
||||||
else:
|
else:
|
||||||
format = 'html'
|
format = 'html'
|
||||||
|
# Check Line histogram to determine if the document uses hard line breaks, If 50% or
|
||||||
|
# more of the lines break in the same region of the document then unwrapping is required
|
||||||
|
docanalysis = DocAnalysis(format, html)
|
||||||
|
hardbreaks = docanalysis.line_histogram(.50)
|
||||||
|
self.log("Hard line breaks check returned "+str(hardbreaks))
|
||||||
# Calculate Length
|
# Calculate Length
|
||||||
length = line_length(format, html, getattr(self.extra_opts,
|
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
|
||||||
'html_unwrap_factor', 0.4))
|
length = docanalysis.line_length(unwrap_factor)
|
||||||
self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
|
self.log("*** Median line length is " + str(length) + ", calculated with " + format + " format ***")
|
||||||
max_length = length * 1.4
|
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
|
||||||
min_max = str("(?<=.{"+str(length)+"})(?<!.{"+str(max_length)+"})")
|
if hardbreaks or unwrap_factor < 0.4:
|
||||||
#
|
self.log("Unwrapping required, unwrapping Lines")
|
||||||
# Unwrap em/en dashes, delete soft-hyphens
|
# Unwrap em/en dashes
|
||||||
#self.log("\n\n\n\n\n\n\n\n\n\n\n"+html+"\n\n\n\n\n\n\n\n\n\n\n\n\n")
|
html = re.sub(u'(?<=.{%i}[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % length, '', html)
|
||||||
html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
|
|
||||||
html = re.sub(u'%s(?<=[\u2013\u2014])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])' % min_max, '', html)
|
|
||||||
# Dehyphenate
|
# Dehyphenate
|
||||||
|
self.log("Unwrapping/Removing hyphens")
|
||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator()
|
||||||
html = dehyphenator(html,'html', length)
|
html = dehyphenator(html,'html', length)
|
||||||
|
self.log("Done dehyphenating")
|
||||||
# Unwrap lines using punctation and line length
|
# Unwrap lines using punctation and line length
|
||||||
unwrap = re.compile(r"(?<=.{%i}([a-z,;):\IA]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
|
unwrap = re.compile(u"(?<=.{%i}([a-z,:)\IA\u00DF]|(?<!\&\w{4});))\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE)
|
||||||
html = unwrap.sub(' ', html)
|
html = unwrap.sub(' ', html)
|
||||||
|
#check any remaining hyphens, but only unwrap if there is a match
|
||||||
|
dehyphenator = Dehyphenator()
|
||||||
|
html = dehyphenator(html,'html_cleanup', length)
|
||||||
|
else:
|
||||||
|
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
|
||||||
|
self.log("Cleaning up hyphenation")
|
||||||
|
dehyphenator = Dehyphenator()
|
||||||
|
html = dehyphenator(html,'html_cleanup', length)
|
||||||
|
self.log("Done dehyphenating")
|
||||||
|
|
||||||
|
# delete soft hyphens
|
||||||
|
html = re.sub(u'\xad\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html)
|
||||||
|
|
||||||
# If still no sections after unwrapping mark split points on lines with no punctuation
|
# If still no sections after unwrapping mark split points on lines with no punctuation
|
||||||
if self.html_preprocess_sections < 10:
|
if self.html_preprocess_sections < 10:
|
||||||
|
@ -9,6 +9,7 @@ import traceback, socket, re, sys
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
import mechanize
|
import mechanize
|
||||||
|
|
||||||
@ -216,6 +217,68 @@ def download_covers(mi, result_queue, max_covers=50, timeout=5.): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
class DoubanCovers(CoverDownload): # {{{
|
||||||
|
'Download covers from Douban.com'
|
||||||
|
|
||||||
|
DOUBAN_ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
|
||||||
|
CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
||||||
|
name = 'Douban.com covers'
|
||||||
|
description = _('Download covers from Douban.com')
|
||||||
|
author = 'Li Fanxi'
|
||||||
|
|
||||||
|
def get_cover_url(self, isbn, br, timeout=5.):
|
||||||
|
try:
|
||||||
|
url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY
|
||||||
|
src = br.open(url, timeout=timeout).read()
|
||||||
|
except Exception, err:
|
||||||
|
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
||||||
|
err = Exception(_('Douban.com API timed out. Try again later.'))
|
||||||
|
raise err
|
||||||
|
else:
|
||||||
|
feed = etree.fromstring(src)
|
||||||
|
NAMESPACES = {
|
||||||
|
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||||
|
'atom' : 'http://www.w3.org/2005/Atom',
|
||||||
|
'db': 'http://www.douban.com/xmlns/'
|
||||||
|
}
|
||||||
|
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||||
|
entries = XPath('//atom:entry')(feed)
|
||||||
|
if len(entries) < 1:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
|
||||||
|
u = cover_url(entries[0])[0].replace('/spic/', '/lpic/');
|
||||||
|
# If URL contains "book-default", the book doesn't have a cover
|
||||||
|
if u.find('book-default') != -1:
|
||||||
|
return None
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
return u
|
||||||
|
|
||||||
|
def has_cover(self, mi, ans, timeout=5.):
|
||||||
|
if not mi.isbn:
|
||||||
|
return False
|
||||||
|
br = browser()
|
||||||
|
try:
|
||||||
|
if self.get_cover_url(mi.isbn, br, timeout=timeout) != None:
|
||||||
|
self.debug('cover for', mi.isbn, 'found')
|
||||||
|
ans.set()
|
||||||
|
except Exception, e:
|
||||||
|
self.debug(e)
|
||||||
|
|
||||||
|
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
||||||
|
if not mi.isbn:
|
||||||
|
return
|
||||||
|
br = browser()
|
||||||
|
try:
|
||||||
|
url = self.get_cover_url(mi.isbn, br, timeout=timeout)
|
||||||
|
cover_data = br.open_novisit(url).read()
|
||||||
|
result_queue.put((True, cover_data, 'jpg', self.name))
|
||||||
|
except Exception, e:
|
||||||
|
result_queue.put((False, self.exception_to_string(e),
|
||||||
|
traceback.format_exc(), self.name))
|
||||||
|
# }}}
|
||||||
|
|
||||||
def download_cover(mi, timeout=5.): # {{{
|
def download_cover(mi, timeout=5.): # {{{
|
||||||
results = Queue()
|
results = Queue()
|
||||||
download_covers(mi, results, max_covers=1, timeout=timeout)
|
download_covers(mi, results, max_covers=1, timeout=timeout)
|
||||||
|
@ -181,7 +181,7 @@ def metadata_from_filename(name, pat=None):
|
|||||||
mi.isbn = si
|
mi.isbn = si
|
||||||
except (IndexError, ValueError):
|
except (IndexError, ValueError):
|
||||||
pass
|
pass
|
||||||
if not mi.title:
|
if mi.is_null('title'):
|
||||||
mi.title = name
|
mi.title = name
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
@ -184,7 +184,7 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
self.gui.tags_view.blockSignals(True)
|
self.gui.tags_view.blockSignals(True)
|
||||||
try:
|
try:
|
||||||
changed = MetadataBulkDialog(self.gui, rows,
|
changed = MetadataBulkDialog(self.gui, rows,
|
||||||
self.gui.library_view.model().db).changed
|
self.gui.library_view.model()).changed
|
||||||
finally:
|
finally:
|
||||||
self.gui.tags_view.blockSignals(False)
|
self.gui.tags_view.blockSignals(False)
|
||||||
if changed:
|
if changed:
|
||||||
|
@ -142,12 +142,13 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
|
|||||||
_('Append to field'),
|
_('Append to field'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, window, rows, db):
|
def __init__(self, window, rows, model):
|
||||||
QDialog.__init__(self, window)
|
QDialog.__init__(self, window)
|
||||||
Ui_MetadataBulkDialog.__init__(self)
|
Ui_MetadataBulkDialog.__init__(self)
|
||||||
self.setupUi(self)
|
self.setupUi(self)
|
||||||
self.db = db
|
self.model = model
|
||||||
self.ids = [db.id(r) for r in rows]
|
self.db = model.db
|
||||||
|
self.ids = [self.db.id(r) for r in rows]
|
||||||
self.box_title.setText('<p>' +
|
self.box_title.setText('<p>' +
|
||||||
_('Editing meta information for <b>%d books</b>') %
|
_('Editing meta information for <b>%d books</b>') %
|
||||||
len(rows))
|
len(rows))
|
||||||
@ -170,7 +171,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
|
|||||||
self.tag_editor_button.clicked.connect(self.tag_editor)
|
self.tag_editor_button.clicked.connect(self.tag_editor)
|
||||||
self.autonumber_series.stateChanged[int].connect(self.auto_number_changed)
|
self.autonumber_series.stateChanged[int].connect(self.auto_number_changed)
|
||||||
|
|
||||||
if len(db.custom_field_keys(include_composites=False)) == 0:
|
if len(self.db.custom_field_keys(include_composites=False)) == 0:
|
||||||
self.central_widget.removeTab(1)
|
self.central_widget.removeTab(1)
|
||||||
else:
|
else:
|
||||||
self.create_custom_column_editors()
|
self.create_custom_column_editors()
|
||||||
@ -617,8 +618,15 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
|
|||||||
self.worker = Worker(args, self.db, self.ids,
|
self.worker = Worker(args, self.db, self.ids,
|
||||||
getattr(self, 'custom_column_widgets', []),
|
getattr(self, 'custom_column_widgets', []),
|
||||||
Dispatcher(bb.accept, parent=bb))
|
Dispatcher(bb.accept, parent=bb))
|
||||||
|
|
||||||
|
# The metadata backup thread causes database commits
|
||||||
|
# which can slow down bulk editing of large numbers of books
|
||||||
|
self.model.stop_metadata_backup()
|
||||||
|
try:
|
||||||
self.worker.start()
|
self.worker.start()
|
||||||
bb.exec_()
|
bb.exec_()
|
||||||
|
finally:
|
||||||
|
self.model.start_metadata_backup()
|
||||||
|
|
||||||
if self.worker.error is not None:
|
if self.worker.error is not None:
|
||||||
return error_dialog(self, _('Failed'),
|
return error_dialog(self, _('Failed'),
|
||||||
|
@ -57,6 +57,10 @@ class SchedulerDialog(QDialog, Ui_Dialog):
|
|||||||
|
|
||||||
self.old_news.setValue(gconf['oldest_news'])
|
self.old_news.setValue(gconf['oldest_news'])
|
||||||
|
|
||||||
|
def keyPressEvent(self, ev):
|
||||||
|
if ev.key() not in (Qt.Key_Enter, Qt.Key_Return):
|
||||||
|
return QDialog.keyPressEvent(self, ev)
|
||||||
|
|
||||||
def break_cycles(self):
|
def break_cycles(self):
|
||||||
self.disconnect(self.recipe_model, SIGNAL('searched(PyQt_PyObject)'),
|
self.disconnect(self.recipe_model, SIGNAL('searched(PyQt_PyObject)'),
|
||||||
self.search_done)
|
self.search_done)
|
||||||
|
@ -159,17 +159,24 @@ class BooksModel(QAbstractTableModel): # {{{
|
|||||||
# do something on the GUI thread. Deadlock.
|
# do something on the GUI thread. Deadlock.
|
||||||
self.cover_cache = CoverCache(db, FunctionDispatcher(self.db.cover))
|
self.cover_cache = CoverCache(db, FunctionDispatcher(self.db.cover))
|
||||||
self.cover_cache.start()
|
self.cover_cache.start()
|
||||||
if self.metadata_backup is not None:
|
self.stop_metadata_backup()
|
||||||
self.metadata_backup.stop()
|
self.start_metadata_backup()
|
||||||
# Would like to to a join here, but the thread might be waiting to
|
|
||||||
# do something on the GUI thread. Deadlock.
|
|
||||||
self.metadata_backup = MetadataBackup(db)
|
|
||||||
self.metadata_backup.start()
|
|
||||||
def refresh_cover(event, ids):
|
def refresh_cover(event, ids):
|
||||||
if event == 'cover' and self.cover_cache is not None:
|
if event == 'cover' and self.cover_cache is not None:
|
||||||
self.cover_cache.refresh(ids)
|
self.cover_cache.refresh(ids)
|
||||||
db.add_listener(refresh_cover)
|
db.add_listener(refresh_cover)
|
||||||
|
|
||||||
|
def start_metadata_backup(self):
|
||||||
|
self.metadata_backup = MetadataBackup(self.db)
|
||||||
|
self.metadata_backup.start()
|
||||||
|
|
||||||
|
def stop_metadata_backup(self):
|
||||||
|
if getattr(self, 'metadata_backup', None) is not None:
|
||||||
|
self.metadata_backup.stop()
|
||||||
|
# Would like to to a join here, but the thread might be waiting to
|
||||||
|
# do something on the GUI thread. Deadlock.
|
||||||
|
|
||||||
|
|
||||||
def refresh_ids(self, ids, current_row=-1):
|
def refresh_ids(self, ids, current_row=-1):
|
||||||
rows = self.db.refresh_ids(ids)
|
rows = self.db.refresh_ids(ids)
|
||||||
if rows:
|
if rows:
|
||||||
|
@ -106,14 +106,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
def compact(self, *args):
|
def compact(self, *args):
|
||||||
from calibre.library.caches import MetadataBackup
|
|
||||||
m = self.gui.library_view.model()
|
m = self.gui.library_view.model()
|
||||||
if m.metadata_backup is not None:
|
m.stop_metadata_backup()
|
||||||
m.metadata_backup.stop()
|
try:
|
||||||
d = CheckIntegrity(m.db, self)
|
d = CheckIntegrity(m.db, self)
|
||||||
d.exec_()
|
d.exec_()
|
||||||
m.metadata_backup = MetadataBackup(m.db)
|
finally:
|
||||||
m.metadata_backup.start()
|
m.start_metadata_backup()
|
||||||
|
|
||||||
def open_config_dir(self, *args):
|
def open_config_dir(self, *args):
|
||||||
from calibre.utils.config import config_dir
|
from calibre.utils.config import config_dir
|
||||||
|
@ -217,9 +217,12 @@ def fetch_scheduled_recipe(arg):
|
|||||||
if 'output_profile' in ps:
|
if 'output_profile' in ps:
|
||||||
recs.append(('output_profile', ps['output_profile'],
|
recs.append(('output_profile', ps['output_profile'],
|
||||||
OptionRecommendation.HIGH))
|
OptionRecommendation.HIGH))
|
||||||
if ps['output_profile'] == 'kindle':
|
# Disabled since apparently some people use
|
||||||
recs.append(('no_inline_toc', True,
|
# K4PC and, surprise, surprise, it doesn't support
|
||||||
OptionRecommendation.HIGH))
|
# indexed MOBIs.
|
||||||
|
#if ps['output_profile'] == 'kindle':
|
||||||
|
# recs.append(('no_inline_toc', True,
|
||||||
|
# OptionRecommendation.HIGH))
|
||||||
|
|
||||||
lf = load_defaults('look_and_feel')
|
lf = load_defaults('look_and_feel')
|
||||||
if lf.get('base_font_size', 0.0) != 0.0:
|
if lf.get('base_font_size', 0.0) != 0.0:
|
||||||
|
@ -73,6 +73,14 @@ class JetBook(Device):
|
|||||||
manufacturer = 'Ectaco'
|
manufacturer = 'Ectaco'
|
||||||
id = 'jetbook'
|
id = 'jetbook'
|
||||||
|
|
||||||
|
class JetBookMini(Device):
|
||||||
|
|
||||||
|
output_profile = 'jetbook5'
|
||||||
|
output_format = 'FB2'
|
||||||
|
name = 'JetBook Mini'
|
||||||
|
manufacturer = 'Ectaco'
|
||||||
|
id = 'jetbookmini'
|
||||||
|
|
||||||
class KindleDX(Kindle):
|
class KindleDX(Kindle):
|
||||||
|
|
||||||
output_profile = 'kindle_dx'
|
output_profile = 'kindle_dx'
|
||||||
@ -584,12 +592,42 @@ class LibraryPage(QWizardPage, LibraryUI):
|
|||||||
qt_app.load_translations()
|
qt_app.load_translations()
|
||||||
self.emit(SIGNAL('retranslate()'))
|
self.emit(SIGNAL('retranslate()'))
|
||||||
self.init_languages()
|
self.init_languages()
|
||||||
|
try:
|
||||||
|
if prefs['language'].lower().startswith('zh'):
|
||||||
|
from calibre.customize.ui import enable_plugin
|
||||||
|
for name in ('Douban Books', 'Douban.com covers'):
|
||||||
|
enable_plugin(name)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def is_library_dir_suitable(self, x):
|
||||||
|
return LibraryDatabase2.exists_at(x) or not os.listdir(x)
|
||||||
|
|
||||||
|
def validatePage(self):
|
||||||
|
newloc = unicode(self.location.text())
|
||||||
|
if not self.is_library_dir_suitable(newloc):
|
||||||
|
self.show_library_dir_error(newloc)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def change(self):
|
def change(self):
|
||||||
dir = choose_dir(self, 'database location dialog',
|
x = choose_dir(self, 'database location dialog',
|
||||||
_('Select location for books'))
|
_('Select location for books'))
|
||||||
if dir:
|
if x:
|
||||||
self.location.setText(dir)
|
if self.is_library_dir_suitable(x):
|
||||||
|
self.location.setText(x)
|
||||||
|
else:
|
||||||
|
self.show_library_dir_error(x)
|
||||||
|
|
||||||
|
def show_library_dir_error(self, x):
|
||||||
|
if not isinstance(x, unicode):
|
||||||
|
try:
|
||||||
|
x = x.decode(filesystem_encoding)
|
||||||
|
except:
|
||||||
|
x = unicode(repr(x))
|
||||||
|
error_dialog(self, _('Bad location'),
|
||||||
|
_('You must choose an empty folder for '
|
||||||
|
'the calibre library. %s is not empty.')%x, show=True)
|
||||||
|
|
||||||
def initializePage(self):
|
def initializePage(self):
|
||||||
lp = prefs['library_path']
|
lp = prefs['library_path']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user