Merge remote-tracking branch 'kovid/master'

This commit is contained in:
Tomasz Długosz 2014-05-04 20:16:27 +02:00
commit 26b73c4f93
53 changed files with 948 additions and 528 deletions

View File

@ -20,6 +20,56 @@
# new recipes:
# - title:
- version: 1.35.0
date: 2014-05-02
new features:
- title: "Edit Book: Redesign the syntax highlighter to improve performance for large documents and extended editing sessions."
tickets: [1314339]
- title: "Edit book: Make cursor movement smooth by not highlighting matching tags while the cursor is moving. Only match highlighting tags if the cursor stays still for a time."
- title: "Spellcheck dialog: Indicate whether a word is ignored in the Misspelled column"
- title: "Spellcheck dialog: Pressing Ctrl+C on the words list copies only selected words, regardless of current cell"
- title: "Add a copy to clipboard action to the context menu for the spell check dialog"
- title: "Edit book: Fix save button incorrectly disabled after a failed save"
tickets: [1313567]
bug fixes:
- title: "Edit Book: Fix an error when merging CSS stylesheets that contain @charset rules"
- title: "Edit book: Fix extra invalid entries being generated in the manifest when editing an AZW3 file that has no images."
- title: "Edit book: Fix a hang when editing an HTML or XML file with text of the form <abc: (i.e. a tag name with a trailing colon)."
tickets: [1314009]
- title: "Fix regression that prevented the ebook editor from starting on linux systems with locale set to 'C'"
tickets: [1315064]
- title: "DOCX Input: Fix formatting of the generated Index when the index is complex, i.e. with lots of references to the same item, multiple level of sub-items, etc."
- title: "Smarten punctuation: Fix a double quote preceded by a hyphen at the end of a sentence (before the start of the next tag) being converted into an opening quote instead of closing quote."
tickets: [1286477]
- title: "News download: Fix very long URLs for links to pages causing errors on windows because of max path length restrictions."
tickets: [1313982]
- title: "Edit book: Fix saved search dialog causing high CPU usage"
- title: "Edit book: Fix importing of Lithuanian dictionary from OpenOffice, that does not specify a country code."
tickets: [1313315]
improved recipes:
- Ars Technica
- Daily Mirror
- Birmingham Evening Mail
- NRC - Next
- Private Eye
- NZZ
- version: 1.34.0
date: 2014-04-25

View File

@ -78,7 +78,7 @@ sum of the individual file sizes.
Many files have special meaning, in the book. These will typically have
an icon next to their names, indicating the special meaning. For example, in
the picture to the left, you can see that the files :guilabel:`cover_image.jpg`
and :guilabel:`titlepage.xhtml` have the ocon of a cover next to them, this
and :guilabel:`titlepage.xhtml` have the icon of a cover next to them, this
indicates they are the book cover image and titlepage. Similarly, the
:guilabel:`content.opf` file has a metadata icon next to it, indicating the
book metadata is present in it and the the :guilabel:`toc.ncx` file has a T
@ -123,7 +123,9 @@ Changing text file order
You can re-arrange the order in which text (HTML) files are opened when reading
the book by simply dragging and dropping them in the Files browser. For the
technically inclined, this is called re-ordering the book spine.
technically inclined, this is called re-ordering the book spine. Note that you
have to drop the items *between* other items, not on top of them, this can be a
little fiddly until you get used to it.
Marking the cover
^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -48,16 +48,16 @@ class ArsTechnica(BasicNewsRecipe):
]
remove_attributes = ['lang']
feeds = [
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/')
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/')
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/')
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/')
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/')
,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/')
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
,(u'Risk Assessment (Security content)' , u'http://feeds.arstechnica.com/arstechnica/security/')
]
def append_page(self, soup, appendtag, position):
@ -75,7 +75,6 @@ class ArsTechnica(BasicNewsRecipe):
pager.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
self.append_page(soup, soup.body, 3)
for item in soup.findAll('a'):
@ -91,10 +90,9 @@ class ArsTechnica(BasicNewsRecipe):
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
if 'alt' not in item:
item['alt'] = 'image'
return soup
def preprocess_raw_html(self, raw, url):
return '<html><head>'+raw[raw.find('</head>'):]

View File

@ -5,18 +5,18 @@ import re
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Birmingham Evening Mail'
description = 'News for Birmingham UK'
#timefmt = ''
# timefmt = ''
__author__ = 'Dave Asbury'
# v1 21/12/13
# 1/5/14
masthead_url = 'http://images.icnetwork.co.uk/upl/icbirmingham/apr2004/6/5/0007417F-982A-107F-969980BFB6FA0000.jpg'
oldest_article = 1
oldest_article = 2
max_articles_per_feed = 10
#linearize_tables = True
# linearize_tables = True
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
remove_attributes = ['style']
#auto_cleanup = True
# auto_cleanup = True
language = 'en_GB'
compress_news_images = True
compress_news_images_max_size = 30
@ -26,8 +26,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
dict(attrs={'class' : 'gallery-data'}),
dict(attrs={'class' : 'ir btn-fullscreen'}),
dict(attrs={'class' : 'tools clearfix'}),
dict(attrs={'class' : 'shareButtons'}),
]
keep_only_tags = [
dict(name='h1'),
dict(attrs={'class' : 'lead-text'}),
# dict(attrs={'class' : 'styleGroup article-header'}),
# dict(attrs={'class' : 'body '}),
dict(attrs={'class' : 'tmCol article'}),]
feeds = [
@ -38,11 +44,15 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
(u'Lifestyle',u'http://www.birminghammail.co.uk/lifestyle/rss.xml'),
]
extra_css = '''
h1{font-weight:bold;font-size: 175%;}
h1{font-weight:bold;}
h2{font-weight:normal;font-size:75%;}
figure {font-size:50%;}
#body{font-size:14px;}
#.photo-caption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;}
#.publish-info {font-size:50%;}
img {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:50%;}
'''
def get_cover_url(self):
soup = self.index_to_soup('http://www.birminghammail.co.uk')
cov = soup.find(attrs={'src' : re.compile('http://images.icnetwork.co.uk/upl/birm')})

View File

@ -7,80 +7,70 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
description = 'News as provided by The Daily Mirror -UK'
__author__ = 'Dave Asbury'
# last updated 27/8/13
# last updated 1/5/14
language = 'en_GB'
#cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
# cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
#recursions = 10
compress_news_images = True
compress_news_images_max_size = 30
oldest_article = 1.5
max_articles_per_feed = 10
max_articles_per_feed = 12
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
ignore_duplicate_articles = {'url'}
ignore_duplicate_articles = {'title'}
#auto_cleanup = True
#conversion_options = { 'linearize_tables' : True }
keep_only_tags = [dict(name='h1'),
keep_only_tags = [
dict(name='h1'),
dict(name='div',attrs={'class' : 'lead-text'}),
dict(attrs={'class' : 'tools clearfix'}),
dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}),
# dict(name='figure',attrs={'class' : 'clearfix'}),
dict(name='figure',attrs={'class' : 'inline-image clearfix '}),
dict(name='div',attrs={'class' : 'styleGroup clearfix'}),
dict(name='div',attrs={
'class' : 'widget relatedContents pictures widget-editable viziwyg-section-70 inpage-widget-2230659'}),
dict(name='div',attrs={'class' :'body '}),
dict(name='div',attrs={'class' :'thumb'}),
dict(attrs={'img alt' : ['Perishers','Horace']}),
#dict(attrs={'class' : 'tmRow span-15-5 col-1 article-page'}),
#dict(attrs={'class' : ['article-attr','byline append-1','published']}),
# dict(name='p'),
]
remove_tags = [
dict(attrs={'class' : ['article sa-teaser type-opinion','last','gallery-caption','gallery-data','ir btn-fullscreen','avatar']}), # ,'image-gallery'
dict(attrs={'class' : ['article sa-teaser type-opinion','image-gallery','gallery-caption']}),
dict(attrs={'class' : 'comment'}),
dict(name='title'),
dict(name='ul'),
dict(name='ul',attrs={'class' : 'clearfix breadcrumbs '}),
dict(name='ul',attrs={'id' : 'login-201109171215'}),
#'widget navigation breadcrumb widget-editable viziwyg-section-198 inpage-widget-80721 span-17','image-credit'
dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
]
preprocess_regexps = [
(re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
(re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: ''),
(re.compile(r'<span class="image-credit">Getty</span>', re.IGNORECASE | re.DOTALL), lambda match: ''),
]
feeds = [
(u'News',u'http://www.mirror.co.uk/news/rss.xml'),
(u'UK News',u'http://www.mirror.co.uk/news/uk-news/rss.xml'),
(u'world News',u'http://www.mirror.co.uk/news/world-news/rss.xml'),
(u'Sports',u'http://www.mirror.co.uk/sport/rss.xml'),
(u'3AM',u'http://www.mirror.co.uk/3am/rss.xml'),
(u'Lifestyle',u'http://www.mirror.co.uk/lifestyle/rss.xml')
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:170%;}
.article figure figcaption {display: block;margin-left: auto;margin-right: auto;
width:100%;font-family:Arial,Helvetica,sans-serif;font-size:40%;}
#h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;}
p{font-family:Arial,Helvetica,sans-serif;}
body{font-family:Helvetica,Arial,sans-serif;}
.article figure{display: block;margin-left: auto;margin-right: auto;width:100%;}
.lead-text p {font-size:150%}
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def get_cover_url(self):
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
# look for the block containing the mirror button and url
# look for the block containing the mirror button and url
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'})
cov2 = str(cov)
cov2='http://www.politicshome.com'+cov2[9:-142]
# cov2 now contains url of the page containing pic
# cov2 now contains url of the page containing pic
soup = self.index_to_soup(cov2)
cov = soup.find(attrs={'id' : 'large'})
cov=str(cov)
@ -96,7 +86,4 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
except:
cover_url ='http://profile.ak.fbcdn.net/hprofile-ak-snc4/373019_6149699161_1710984811_n.jpg'
# print '******** string is ', cov2,' ***'
#cover_url = cov2
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
return cover_url

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

View File

@ -3,15 +3,16 @@
# Based on veezh's original recipe, Kovid Goyal's New York Times recipe and Snaabs nrc Handelsblad recipe
__license__ = 'GPL v3'
__copyright__ = '2013, Niels Giesen'
__copyright__ = '2014, Niels Giesen'
'''
www.nrc.nl
'''
import os, zipfile
import time
import os, zipfile, re
from io import BytesIO
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from datetime import date, timedelta
class NRCNext(BasicNewsRecipe):
@ -19,8 +20,8 @@ class NRCNext(BasicNewsRecipe):
title = u'nrc•next'
description = u'De ePaper-versie van nrc•next'
language = 'nl'
lang = 'nl-NL'
needs_subscription = True
requires_version = (1, 24, 0)
__author__ = 'Niels Giesen'
@ -28,48 +29,45 @@ class NRCNext(BasicNewsRecipe):
'no_default_epub_cover' : True
}
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('http://login.nrc.nl/login')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def build_index(self):
today = time.strftime("%Y%m%d")
domain = "http://digitaleeditie.nrc.nl"
url = domain + "/digitaleeditie/helekrant/epub/nn_" + today + ".epub"
#print url
from calibre.web.jsbrowser.browser import Browser, ElementNotFound
br = Browser()
br.visit('http://login.nrc.nl/login', timeout=60)
f = br.select_form('#command')
f['username'] = self.username
f['password'] = self.password
br.submit()
raw = br.html
if '>log out<' not in raw:
raise ValueError('Failed to login, check username and password')
epubraw = None
for today in (date.today(), date.today() - timedelta(days=1),):
url = 'http://digitaleeditie.nrc.nl/digitaleeditie/NN/%s/%d/%s___/downloads.html' % (today.strftime('%Y'), today.month - 1, today.strftime('%Y%m%d'))
self.log('Trying to download epub from:', url)
br.start_load(url, timeout=60)
try:
br = self.get_browser()
f = br.open(url)
except:
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
epubraw = br.download_file('#CompleteDownloads .download-list .download-button')
break
except ElementNotFound:
self.log('%r not available yet' % url)
continue
if epubraw is None:
raise ValueError('Krant van vandaag nog niet beschikbaar')
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0,_('downloading epub'))
tmp.write(f.read())
f.close()
br.close()
if zipfile.is_zipfile(tmp):
try:
zfile = zipfile.ZipFile(tmp.name, 'r')
zfile = zipfile.ZipFile(BytesIO(epubraw), 'r')
zfile.extractall(self.output_dir)
self.report_progress(0,_('extracting epub'))
except zipfile.BadZipfile:
self.report_progress(0,_('BadZip error, continuing'))
tmp.close()
namelist = zfile.namelist()
emre = re.compile("&lt;em(?:.*)&gt;(.*)&lt;/em&gt;")
subst = '\\1'
for name in namelist:
_, ext = os.path.splitext(name);
if (ext == '.html') or (ext == '.ncx'):
fname = os.path.join(self.output_dir, name)
with open(fname) as f:
s = f.read()
s = emre.sub(subst, s)
with open(fname, 'w') as f:
f.write(s)
index = os.path.join(self.output_dir, 'metadata.opf')
self.report_progress(1,_('epub downloaded and extracted'))
return index

View File

@ -47,7 +47,6 @@ class Nzz(BasicNewsRecipe):
# print soup.prettify()
articles = {}
key = None
sections = []
ans = []
issue = soup.find("link",rel="prefetch")

View File

@ -1,31 +1,38 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1359406781(BasicNewsRecipe):
title = u'Private Eye'
oldest_article = 15
publication_type = 'magazine'
description = u'Private Eye is a fortnightly British satirical and current affairs magazine, edited by Ian Hislop'
oldest_article = 13
max_articles_per_feed = 100
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
ignore_duplicate_articles = {'title'}
language = 'en_GB'
__author__ = 'Martyn Pritchard'
encoding = 'iso-8859-1'
compress_news_images = True
compress_news_images_auto_size = 8
scale_news_images_to_device = False
scale_news_images = (220, 300)
__author__ = u'MartynPritchard@yahoo.com'
__copyright__ = '2014, Martyn Pritchard <MartynPritchard@yahoo.com>'
def get_cover_url(self):
soup = self.index_to_soup('http://www.private-eye.co.uk')
cov = soup.find(attrs={'width' : '180', 'border' : '0'})
cover_url = 'http://www.private-eye.co.uk/'+cov['src']
cover_url = None
soup = self.index_to_soup('http://www.private-eye.co.uk/current_issue.php')
for citem in soup.findAll('img'):
if citem['src'].endswith('big.jpg'):
return 'http://www.private-eye.co.uk/' + citem['src']
return cover_url
keep_only_tags = [dict(name='table', attrs={'width':['100%'], 'border':['0'], 'align': ['center'], 'cellspacing':['0'], 'cellpadding':['0']}),
dict(name='table', attrs={'width':['480'], 'cellspacing':['0'], 'cellpadding':['0']}),
dict(name='table', attrs={'width':['490'], 'border':['0'], 'align': ['left'], 'cellspacing':['0'], 'cellpadding':['1']}),
dict(name='table', attrs={'width':['500'], 'cellspacing':['0'], 'cellpadding':['0']}),
remove_tags_before = {'class':"sub_dave"}
remove_tags = [dict(name='td', attrs={'class':'sub_dave'})]
preprocess_regexps = [
(re.compile(r'../grfx', re.DOTALL|re.IGNORECASE), lambda match: 'http://www.private-eye.co.uk/grfx'),
(re.compile(r'More From This Issue.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
(re.compile(r'More top stories in the latest issue:.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
(re.compile(r'Also Available Online.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
]
feeds = [(u'Private Eye', u'http://www.private-eye.co.uk/rss/rss.php')]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 23 KiB

View File

@ -28,9 +28,8 @@ bit.
(First check if the version of VS 2008 you have is not already SP1)
3) Install The Windows SDK. You need to install a version that is built for VS
2008. Get it from here: http://www.microsoft.com/en-us/download/details.aspx?id=3138
4) If you are building 64bit, edit the properties of the Visual Studio command
prompt shortcut to pass "amd64" instead of "x86" to the vsvars.bat file so that
it uses the 64 bit tools.
4) If you are building 64bit, remember to use the 64bit version of the visual
studio command prompt.
I've read that it is possible to use the 64-bit compiler that comes with the
Windows SDK With VS 2008 Express Edition, but I can't be bothered figuring it

View File

@ -81,7 +81,7 @@ class Manual(Command):
os.makedirs('.build'+os.sep+'html')
os.environ['__appname__'] = __appname__
os.environ['__version__'] = __version__
subprocess.check_call(['sphinx-build', '-b', 'html', '-t', 'online',
subprocess.check_call(['sphinx-build2', '-b', 'html', '-t', 'online',
'-d', '.build/doctrees', '.', '.build/html'])
subprocess.check_call(['sphinx-build', '-b', 'myepub', '-d',
'.build/doctrees', '.', '.build/epub'])

View File

@ -13,10 +13,11 @@ from functools import partial
from setup import Command, __appname__, __version__, require_git_master
def qt_sources():
qtdir = glob.glob('/usr/src/qt-*')[-1]
# QT5XX: Change this
qtdir = '/usr/src/qt4'
j = partial(os.path.join, qtdir)
return list(map(j, [
'src/gui/widgets/qdialogbuttonbox.cpp',
'gui/widgets/qdialogbuttonbox.cpp',
]))
class POT(Command): # {{{
@ -152,7 +153,8 @@ class Translations(POT): # {{{
def run(self, opts):
l = {}
execfile(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py'), l, l)
exec(compile(open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py'))
.read(), os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py'), 'exec'), l, l)
lcdata = {k:{k1:v1 for k1, v1 in v} for k, v in l['data']}
self.iso639_errors = []
for f in self.po_files():
@ -308,14 +310,14 @@ class GetTranslations(Translations): # {{{
os.mkdir(errors)
pofilter = ('pofilter', '-i', self.LP_PATH, '-o', errors,
'-t', 'accelerators', '-t', 'escapes', '-t', 'variables',
#'-t', 'xmltags',
#'-t', 'brackets',
#'-t', 'emails',
#'-t', 'doublequoting',
#'-t', 'filepaths',
#'-t', 'numbers',
# '-t', 'xmltags',
# '-t', 'brackets',
# '-t', 'emails',
# '-t', 'doublequoting',
# '-t', 'filepaths',
# '-t', 'numbers',
'-t', 'options',
#'-t', 'urls',
# '-t', 'urls',
'-t', 'printf')
subprocess.check_call(pofilter)
errfiles = glob.glob(errors+os.sep+'*.po')

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (1, 34, 0)
numeric_version = (1, 35, 0)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -27,7 +27,8 @@ from calibre.utils.icu import sort_key
from calibre.utils.config import to_json, from_json, prefs, tweaks
from calibre.utils.date import utcfromtimestamp, parse_date
from calibre.utils.filenames import (
is_case_sensitive, samefile, hardlink_file, ascii_filename, WindowsAtomicFolderMove, atomic_rename)
is_case_sensitive, samefile, hardlink_file, ascii_filename,
WindowsAtomicFolderMove, atomic_rename, remove_dir_if_empty)
from calibre.utils.magick.draw import save_cover_data_to
from calibre.utils.formatter_functions import load_user_template_functions
from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
@ -1184,8 +1185,7 @@ class DB(object):
Read all data from the db into the python in-memory tables
'''
with self.conn: # Use a single transaction, to ensure nothing modifies
# the db while we are reading
with self.conn: # Use a single transaction, to ensure nothing modifies the db while we are reading
for table in self.tables.itervalues():
try:
table.read(self)
@ -1538,11 +1538,7 @@ class DB(object):
if permanent:
for path in paths:
self.rmtree(path)
try:
os.rmdir(os.path.dirname(path))
except OSError as e:
if e.errno != errno.ENOTEMPTY:
raise
remove_dir_if_empty(os.path.dirname(path), ignore_metadata_caches=True)
else:
delete_service().delete_books(paths, self.library_path)
@ -1667,5 +1663,3 @@ class DB(object):
vals = [(book_id, fmt, size, name) for fmt, size, name in formats]
self.executemany('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', vals)
# }}}

View File

@ -11,6 +11,7 @@ from threading import Thread
from Queue import Queue
from calibre.ptempfile import remove_dir
from calibre.utils.filenames import remove_dir_if_empty
from calibre.utils.recycle_bin import delete_tree, delete_file
class DeleteService(Thread):
@ -93,10 +94,10 @@ class DeleteService(Thread):
time.sleep(1)
shutil.move(path, dest)
if delete_empty_parent:
self.remove_dir_if_empty(os.path.dirname(path))
remove_dir_if_empty(os.path.dirname(path), ignore_metadata_caches=True)
requests.append(dest)
if not requests:
self.remove_dir_if_empty(tdir)
remove_dir_if_empty(tdir)
else:
self.requests.put(tdir)

View File

@ -814,7 +814,7 @@ class Search(object):
def _update_caches(self, sqp, book_ids):
book_ids = sqp.all_book_ids = set(book_ids)
remove = set()
for query, result in self.cache:
for query, result in tuple(self.cache):
try:
matches = sqp.parse(query)
except ParseException:

View File

@ -231,6 +231,8 @@ class AddRemoveTest(BaseTest):
fmtpath = cache.format_abspath(1, 'FMT1')
bookpath = os.path.dirname(fmtpath)
authorpath = os.path.dirname(bookpath)
os.mkdir(os.path.join(authorpath, '.DS_Store'))
open(os.path.join(authorpath, 'Thumbs.db'), 'wb').close()
item_id = {v:k for k, v in cache.fields['#series'].table.id_map.iteritems()}['My Series Two']
cache.remove_books((1,), permanent=True)
for x in (fmtpath, bookpath, authorpath):

View File

@ -233,15 +233,37 @@ class libiMobileDevice():
dst: file to be created on iOS filesystem
'''
self._log_location("src:{0} dst:{1}".format(repr(src), repr(dst)))
mode = 'rb'
with open(src, mode) as f:
content = bytearray(f.read())
BUFFER_SIZE = 10 * 1024 * 1024
mode = 'wb'
handle = self._afc_file_open(str(dst), mode=mode)
handle = self._afc_file_open(str(dst), mode='wb')
if handle is not None:
success = self._afc_file_write(handle, content, mode=mode)
# Get the file size
file_stats = os.stat(src)
file_size = file_stats.st_size
self._log("file_size: {:,} bytes".format(file_size))
if file_size > BUFFER_SIZE:
bytes_remaining = file_size
with open(src, 'rb') as f:
while bytes_remaining:
if bytes_remaining > BUFFER_SIZE:
self._log("copying {:,} byte chunk".format(BUFFER_SIZE))
content = bytearray(f.read(BUFFER_SIZE))
success = self._afc_file_write(handle, content, mode='wb')
bytes_remaining -= BUFFER_SIZE
else:
self._log("copying final {:,} bytes".format(bytes_remaining))
content = bytearray(f.read(bytes_remaining))
success = self._afc_file_write(handle, content, mode='wb')
bytes_remaining = 0
self._log(" success: {0}".format(success))
else:
with open(src, 'rb') as f:
content = bytearray(f.read())
handle = self._afc_file_open(str(dst), mode='wb')
if handle is not None:
success = self._afc_file_write(handle, content, mode='wb')
self._log(" success: {0}".format(success))
self._afc_file_close(handle)
else:
self._log(" could not create copy")

View File

@ -525,10 +525,15 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
# Network functions
def _read_binary_from_net(self, length):
try:
self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
v = self.device_socket.recv(length)
self.device_socket.settimeout(None)
return v
except:
self._close_device_socket()
raise
def _read_string_from_net(self):
data = bytes(0)
@ -556,23 +561,30 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
def _send_byte_string(self, sock, s):
if not isinstance(s, bytes):
self._debug('given a non-byte string!')
self._close_device_socket()
raise PacketError("Internal error: found a string that isn't bytes")
sent_len = 0
total_len = len(s)
while sent_len < total_len:
try:
sock.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
if sent_len == 0:
amt_sent = sock.send(s)
else:
amt_sent = sock.send(s[sent_len:])
sock.settimeout(None)
if amt_sent <= 0:
raise IOError('Bad write on socket')
sent_len += amt_sent
except socket.error as e:
self._debug('socket error', e, e.errno)
if e.args[0] != EAGAIN and e.args[0] != EINTR:
self._close_device_socket()
raise
time.sleep(0.1) # lets not hammer the OS too hard
except:
self._close_device_socket()
raise
# This must be protected by a lock because it is called from the GUI thread
# (the sync stuff) and the device manager thread
@ -592,7 +604,6 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
s = self._json_encode(self.opcodes[op], arg)
if print_debug_info and extra_debug:
self._debug('send string', s)
self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
self._send_byte_string(self.device_socket, (b'%d' % len(s)) + s)
if not wait_for_response:
return None, None
@ -617,7 +628,6 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
extra_debug = self.settings().extra_customization[self.OPT_EXTRA_DEBUG]
try:
v = self._read_string_from_net()
self.device_socket.settimeout(None)
if print_debug_info and extra_debug:
self._debug('received string', v)
if v:
@ -655,10 +665,10 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
'metadata': book_metadata, 'thisBook': this_book,
'totalBooks': total_books,
'willStreamBooks': True,
'willStreamBinary' : True},
'willStreamBinary' : True,
'wantsSendOkToSendbook' : self.can_send_ok_to_sendbook},
print_debug_info=False,
wait_for_response=False)
wait_for_response=self.can_send_ok_to_sendbook)
self._set_known_metadata(book_metadata)
pos = 0
failed = False
@ -1029,6 +1039,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self._debug('Device can use cached metadata', self.client_can_use_metadata_cache)
self.client_cache_uses_lpaths = result.get('cacheUsesLpaths', False)
self._debug('Cache uses lpaths', self.client_cache_uses_lpaths)
self.can_send_ok_to_sendbook = result.get('canSendOkToSendbook', False)
self._debug('Can send OK to sendbook', self.can_send_ok_to_sendbook)
if not self.settings().extra_customization[self.OPT_USE_METADATA_CACHE]:
self.client_can_use_metadata_cache = False

View File

@ -15,6 +15,7 @@ import os, subprocess, time, re, sys, glob
from itertools import repeat
from calibre import prints, as_unicode
from calibre.constants import DEBUG
from calibre.devices.interface import DevicePlugin
from calibre.devices.errors import DeviceError
from calibre.devices.usbms.deviceconfig import DeviceConfig
@ -615,6 +616,8 @@ class Device(DeviceConfig, DevicePlugin):
'the device has already been ejected, or your '
'kernel is exporting a deprecated version of SYSFS.')
%self.__class__.__name__)
if DEBUG:
print '\nFound device nodes:', main, carda, cardb
self._linux_mount_map = {}
mp, ret = mount(main, 'main')
@ -659,6 +662,8 @@ class Device(DeviceConfig, DevicePlugin):
os.remove(path)
except:
pass
if DEBUG and ro:
print '\nThe mountpoint', mp, 'is readonly, ignoring it'
return ro
for mp in ('_main_prefix', '_card_a_prefix', '_card_b_prefix'):
@ -722,7 +727,7 @@ class Device(DeviceConfig, DevicePlugin):
d.serial == objif.GetProperty('usb.serial'):
dpaths = manager.FindDeviceStringMatch('storage.originating_device', path)
for dpath in dpaths:
#devif = dbus.Interface(bus.get_object('org.freedesktop.Hal', dpath), 'org.freedesktop.Hal.Device')
# devif = dbus.Interface(bus.get_object('org.freedesktop.Hal', dpath), 'org.freedesktop.Hal.Device')
try:
vpaths = manager.FindDeviceStringMatch('block.storage_device', dpath)
for vpath in vpaths:
@ -740,13 +745,13 @@ class Device(DeviceConfig, DevicePlugin):
'vol': volif,
'label': vdevif.GetProperty('volume.label')}
vols.append(vol)
except dbus.exceptions.DBusException, e:
except dbus.exceptions.DBusException as e:
print e
continue
except dbus.exceptions.DBusException, e:
except dbus.exceptions.DBusException as e:
print e
continue
except dbus.exceptions.DBusException, e:
except dbus.exceptions.DBusException as e:
continue
def ocmp(x,y):
@ -779,7 +784,7 @@ class Device(DeviceConfig, DevicePlugin):
print "ERROR: Timeout waiting for mount to complete"
continue
mp = vol['dev'].GetProperty('volume.mount_point')
except dbus.exceptions.DBusException, e:
except dbus.exceptions.DBusException as e:
print "Failed to mount ", e
continue
@ -828,7 +833,7 @@ class Device(DeviceConfig, DevicePlugin):
print "FBSD: umount main:", self._main_prefix
try:
self._main_vol.Unmount([])
except dbus.exceptions.DBusException, e:
except dbus.exceptions.DBusException as e:
print 'Unable to eject ', e
if self._card_a_prefix:
@ -836,7 +841,7 @@ class Device(DeviceConfig, DevicePlugin):
print "FBSD: umount card a:", self._card_a_prefix
try:
self._card_a_vol.Unmount([])
except dbus.exceptions.DBusException, e:
except dbus.exceptions.DBusException as e:
print 'Unable to eject ', e
if self._card_b_prefix:
@ -844,7 +849,7 @@ class Device(DeviceConfig, DevicePlugin):
print "FBSD: umount card b:", self._card_b_prefix
try:
self._card_b_vol.Unmount([])
except dbus.exceptions.DBusException, e:
except dbus.exceptions.DBusException as e:
print 'Unable to eject ', e
self._main_prefix = None

View File

@ -31,7 +31,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
'textile', 'markdown', 'ibook', 'iba', 'azw3', 'ps']
'textile', 'markdown', 'ibook', 'iba', 'azw3', 'ps', 'kepub']
class HTMLRenderer(object):

View File

@ -77,7 +77,7 @@ class Plumber(object):
def __init__(self, input, output, log, report_progress=DummyReporter(),
dummy=False, merge_plugin_recs=True, abort_after_input_dump=False,
override_input_metadata=False, for_regex_wizard=False):
override_input_metadata=False, for_regex_wizard=False, view_kepub=False):
'''
:param input: Path to input file.
:param output: Path to output file/directory
@ -702,6 +702,8 @@ OptionRecommendation(name='search_replace',
if not input_fmt:
raise ValueError('Input file must have an extension')
input_fmt = input_fmt[1:].lower().replace('original_', '')
if view_kepub and input_fmt.lower() == 'kepub':
input_fmt = 'epub'
self.archive_input_tdir = None
if input_fmt in ARCHIVE_FMTS:
self.log('Processing archive...')

View File

@ -119,7 +119,7 @@ def process_index(field, index, xe_fields, log):
return hyperlinks, blocks
def split_up_block(block, a, text, parts):
def split_up_block(block, a, text, parts, ldict):
prefix = parts[:-1]
a.text = parts[-1]
parent = a.getparent()
@ -127,31 +127,100 @@ def split_up_block(block, a, text, parts):
for i, prefix in enumerate(prefix):
m = 1.5 * i
span = parent.makeelement('span', style=style % m)
ldict[span] = i
parent.append(span)
span.text = prefix
span = parent.makeelement('span', style=style % ((i + 1) * 1.5))
parent.append(span)
span.append(a)
ldict[span] = len(prefix)
def merge_blocks(prev_block, next_block, prev_path, next_path):
pa, na = prev_block.xpath('descendant::a'), next_block.xpath('descendant::a[1]')
if not pa or not na:
return
pa, na = pa[-1], na[0]
if prev_path == next_path:
"""
The merge algorithm is a little tricky.
We start with a list of elementary blocks. Each is an HtmlElement, a p node
with a list of child nodes. The last child is a link, and the earlier ones are
just text.
The list is in reverse order from what we want in the index.
There is a dictionary ldict which records the level of each child node.
Now we want to do a reduce-like operation, combining all blocks with the same
top level index entry into a single block representing the structure of all
references, subentries, etc. under that top entry.
Here's the algorithm.
Given a block p and the next block n, and the top level entries p1 and n1 in each
block, which we assume have the same text:
Start with (p, p1) and (n, n1).
Given (p, p1, ..., pk) and (n, n1, ..., nk) which we want to merge:
If there are no more levels in n, then add the link from nk to the links for pk.
This might be the first link for pk, or we might get a list of references.
Otherwise nk+1 is the next level in n. Look for a matching entry in p. It must have
the same text, it must follow pk, it must come before we find any other p entries at
the same level as pk, and it must have the same level as nk+1.
If we find such a matching entry, go back to the start with (p ... pk+1) and (n ... nk+1).
If there is no matching entry, then because of the original reversed order we want
to insert nk+1 and all following entries from n into p immediately following pk.
"""
def find_match(prev_block, pind, nextent, ldict):
curlevel = ldict[prev_block[pind]]
for p in range(pind+1, len(prev_block)):
trylev = ldict[prev_block[p]]
if trylev <= curlevel:
return -1
if trylev > (curlevel+1):
continue
if prev_block[p].text_content() == nextent.text_content():
return p
return -1
def add_link(pent, nent, ldict):
na = nent.xpath('descendant::a[1]')
na = na[0]
pa = pent.xpath('descendant::a')
if pa and len(pa) > 0:
# Put on same line with a comma
pa = pa[-1]
pa.tail = ', '
p = pa.getparent()
p.insert(p.index(pa) + 1, na)
else:
# Add a line to the previous block
ps, ns = pa.getparent(), na.getparent()
p = ps.getparent()
p.insert(p.index(ps) + 1, ns)
# substitute link na for plain text in pent
pent.text = ""
pent.append(na)
def merge_blocks(prev_block, next_block, pind, nind, next_path, ldict):
# First elements match. Any more in next?
if len(next_path) == (nind + 1):
nextent = next_block[nind]
add_link(prev_block[pind], nextent, ldict)
return
nind = nind + 1
nextent = next_block[nind]
prevent = find_match(prev_block, pind, nextent, ldict)
if prevent > 0:
merge_blocks(prev_block, next_block, prevent, nind, next_path, ldict)
return
# Want to insert elements into previous block
while nind < len(next_block):
# insert takes it out of old
pind = pind + 1
prev_block.insert(pind, next_block[nind])
next_block.getparent().remove(next_block)
def polish_index_markup(index, blocks):
# Blocks are in reverse order at this point
path_map = {}
ldict = {}
for block in blocks:
cls = block.get('class', '') or ''
block.set('class', (cls + ' index-entry').lstrip())
@ -162,20 +231,22 @@ def polish_index_markup(index, blocks):
if ':' in text:
path_map[block] = parts = filter(None, (x.strip() for x in text.split(':')))
if len(parts) > 1:
split_up_block(block, a[0], text, parts)
split_up_block(block, a[0], text, parts, ldict)
else:
# try using a span all the time
path_map[block] = [text]
parent = a[0].getparent()
span = parent.makeelement('span', style='display:block; margin-left: 0em')
parent.append(span)
span.append(a[0])
ldict[span] = 0
# We want a single block for each main entry
prev_block = blocks[0]
for block in blocks[1:]:
pp, pn = path_map[prev_block], path_map[block]
if pp == pn:
merge_blocks(prev_block, block, pp, pn)
elif len(pp) > 1 and len(pn) >= len(pp):
if pn[:-1] in (pp[:-1], pp):
merge_blocks(prev_block, block, pp, pn)
# It's possible to have pn starting with pp but having more
# than one extra entry, but until I see that in the wild, I'm not
# going to bother
if pp[0] == pn[0]:
merge_blocks(prev_block, block, 0, 0, pn, ldict)
else:
prev_block = block

View File

@ -61,7 +61,8 @@ def reverse_tag_iter(block):
class Mobi8Reader(object):
def __init__(self, mobi6_reader, log):
def __init__(self, mobi6_reader, log, for_tweak=False):
self.for_tweak = for_tweak
self.mobi6_reader, self.log = mobi6_reader, log
self.header = mobi6_reader.book_header
self.encrypted_fonts = []
@ -459,6 +460,20 @@ class Mobi8Reader(object):
def exclude(path):
return os.path.basename(path) == 'debug-raw.html'
# If there are no images then the azw3 input plugin dumps all
# binary records as .unknown images, remove them
if self.for_tweak and os.path.exists('images') and os.path.isdir('images'):
files = os.listdir('images')
unknown = [x for x in files if x.endswith('.unknown')]
if len(files) == len(unknown):
[os.remove('images/'+f) for f in files]
if self.for_tweak:
try:
os.remove('debug-raw.html')
except:
pass
opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude)
for entry in opf.manifest:
if entry.mime_type == 'text/html':
@ -539,4 +554,3 @@ class Mobi8Reader(object):
parent.add_item(href, frag, text)
current_depth = depth
return ans

View File

@ -125,8 +125,8 @@ def iterlinks(root, find_links_in_css=True):
if tag == XHTML('object'):
codebase = None
## <object> tags have attributes that are relative to
## codebase
# <object> tags have attributes that are relative to
# codebase
if 'codebase' in attribs:
codebase = el.get('codebase')
yield (el, 'codebase', codebase, 0)
@ -604,8 +604,8 @@ class Metadata(object):
allowed = self.allowed
if allowed is not None and term not in allowed:
raise AttributeError(
'attribute %r not valid for metadata term %r'
% (self.attr(term), barename(obj.term)))
'attribute %r not valid for metadata term %r' % (
self.attr(term), barename(obj.term)))
return self.attr(term)
def __get__(self, obj, cls):

View File

@ -14,7 +14,7 @@ from calibre.customize.ui import available_input_formats
def is_supported(path):
ext = os.path.splitext(path)[1].replace('.', '').lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
return ext in available_input_formats()
return ext in available_input_formats() or ext == 'kepub'
class UnsupportedFormatError(Exception):

View File

@ -75,7 +75,7 @@ class EbookIterator(BookmarksMixin):
return i
def __enter__(self, processed=False, only_input_plugin=False,
run_char_count=True, read_anchor_map=True,
run_char_count=True, read_anchor_map=True, view_kepub=False,
extract_embedded_fonts_for_qt=False):
''' Convert an ebook file into an exploded OEB book suitable for
display in viewers/preprocessing etc. '''
@ -85,7 +85,7 @@ class EbookIterator(BookmarksMixin):
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber = Plumber(self.pathtoebook, self.base, self.log, view_kepub=view_kepub)
plumber.setup_options()
if self.pathtoebook.lower().endswith('.opf'):
plumber.opts.dont_package = True

View File

@ -1022,20 +1022,9 @@ def do_explode(path, dest):
mr = MobiReader(stream, default_log, None, None)
with CurrentDir(dest):
mr = Mobi8Reader(mr, default_log)
mr = Mobi8Reader(mr, default_log, for_tweak=True)
opf = os.path.abspath(mr())
obfuscated_fonts = mr.encrypted_fonts
# If there are no images then the azw3 input plugin dumps all
# binary records as .unknown images, remove them
if os.path.exists('images') and os.path.isdir('images'):
files = os.listdir('images')
unknown = [x for x in files if x.endswith('.unknown')]
if len(files) == len(unknown):
[os.remove('images/'+f) for f in files]
try:
os.remove('debug-raw.html')
except:
pass
return opf, obfuscated_fonts

View File

@ -419,7 +419,7 @@ def merge_css(container, names, master):
# Remove charset rules
cr = [r for r in sheet.cssRules if r.type == r.CHARSET_RULE]
[sheet.remove(r) for r in cr]
[sheet.deleteRule(sheet.cssRules.index(r)) for r in cr]
for rule in sheet.cssRules:
msheet.add(rule)

View File

@ -126,3 +126,27 @@ def link_stylesheets(container, names, sheets, remove=False, mtype='text/css'):
container.dirty(name)
return changed_names
def lead_text(top_elem, num_words=10):
''' Return the leading text contained in top_elem (including descendants)
upto a maximum of num_words words. More efficient than using
etree.tostring(method='text') as it does not have to serialize the entire
sub-tree rooted at top_elem.'''
pat = re.compile(r'\s+', flags=re.UNICODE)
words = []
def get_text(x, attr='text'):
ans = getattr(x, attr)
if ans:
words.extend(filter(None, pat.split(ans)))
stack = [(top_elem, 'text')]
while stack and len(words) < num_words:
elem, attr = stack.pop()
get_text(elem, attr)
if attr == 'text':
if elem is not top_elem:
stack.append((elem, 'tail'))
stack.extend(reversed(list((c, 'text') for c in elem.iterchildren('*'))))
return ' '.join(words[:num_words])

View File

@ -158,7 +158,7 @@ def _config(): # {{{
help=_('Options for the LRF ebook viewer'))
c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT',
'MOBI', 'PRC', 'POBI', 'AZW', 'AZW3', 'HTML', 'FB2', 'PDB', 'RB',
'SNB', 'HTMLZ'], help=_(
'SNB', 'HTMLZ', 'KEPUB'], help=_(
'Formats that are viewed using the internal viewer'))
c.add_opt('column_map', default=ALL_COLUMNS,
help=_('Columns to be displayed in the book list'))
@ -854,8 +854,8 @@ def setup_gui_option_parser(parser):
help='Detach from the controlling terminal, if any (linux only)')
def detach_gui():
if islinux and not DEBUG and sys.stdout.isatty():
# We are a GUI process running in a terminal so detach from the controlling terminal
if islinux and not DEBUG:
# Detach from the controlling process.
if os.fork() != 0:
raise SystemExit(0)
os.setsid()

View File

@ -830,6 +830,7 @@ class Boss(QObject):
if self.doing_terminal_save:
prints(tb, file=sys.stderr)
return
self.gui.action_save.setEnabled(True)
error_dialog(self.gui, _('Could not save'),
_('Saving of the book failed. Click "Show Details"'
' for more information. You can try to save a copy'

View File

@ -28,9 +28,9 @@ class QtHighlighter(QTextDocument):
QTextDocument.__init__(self, parent)
self.l = QPlainTextDocumentLayout(self)
self.setDocumentLayout(self.l)
self.highlighter = hlclass(self)
self.highlighter = hlclass()
self.highlighter.apply_theme(get_theme())
self.highlighter.setDocument(self)
self.highlighter.set_document(self)
self.setPlainText(text)
def copy_lines(self, lo, hi, cursor):

View File

@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from PyQt4.Qt import QTextCharFormat
from PyQt4.Qt import QTextCharFormat, QFont
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.container import guess_type
@ -41,3 +41,9 @@ class SyntaxTextCharFormat(QTextCharFormat):
QTextCharFormat.__init__(self, *args)
self.setProperty(SYNTAX_PROPERTY, True)
def __repr__(self):
return 'SyntaxFormat(id=%s, color=%s, italic=%s, bold=%s)' % (
id(self), self.foreground().color().name(), self.fontItalic(), self.fontWeight() >= QFont.DemiBold)
__str__ = __repr__

View File

@ -24,8 +24,8 @@ class Tag(object):
def __init__(self, start_block, tag_start, end_block, tag_end, self_closing=False):
self.start_block, self.end_block = start_block, end_block
self.start_offset, self.end_offset = tag_start.offset, tag_end.offset
tag = tag_start.name or tag_start.prefix
if tag_start.name and tag_start.prefix:
tag = tag_start.name
if tag_start.prefix:
tag = tag_start.prefix + ':' + tag
self.name = tag
self.self_closing = self_closing
@ -101,8 +101,8 @@ def find_tag_definition(block, offset):
return None, False
tag_start = boundary
closing = tag_start.closing
tag = tag_start.name or tag_start.prefix
if tag_start.name and tag_start.prefix:
tag = tag_start.name
if tag_start.prefix:
tag = tag_start.prefix + ':' + tag
return tag, closing

View File

@ -6,45 +6,61 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from PyQt4.Qt import (QSyntaxHighlighter, QApplication, QCursor, Qt)
from collections import defaultdict
from PyQt4.Qt import (
QTextCursor, pyqtSlot, QTextBlockUserData, QTextLayout)
from ..themes import highlight_to_char_format
from calibre.gui2.tweak_book.widgets import BusyCursor
def run_loop(user_data, state_map, formats, text):
state = user_data.state
i = 0
seen_states = defaultdict(set)
while i < len(text):
orig_i = i
seen_states[i].add(state.parse)
fmt = state_map[state.parse](state, text, i, formats, user_data)
for num, f in fmt:
if num > 0:
yield i, num, f
i += num
if orig_i == i and state.parse in seen_states[i]:
# Something went wrong in the syntax highlighter
print ('Syntax highlighter returned a zero length format, parse state:', state.parse)
break
class SimpleState(object):
def __init__(self, value):
self.parse = value
__slots__ = ('parse',)
@property
def value(self):
return self.parse
def __init__(self):
self.parse = 0
def run_loop(state, state_map, formats, text):
i = 0
while i < len(text):
fmt = state_map[state.parse](state, text, i, formats)
for num, f in fmt:
yield i, num, f
i += num
def copy(self):
s = SimpleState()
s.parse = self.parse
return s
class SyntaxHighlighter(QSyntaxHighlighter):
class SimpleUserData(QTextBlockUserData):
def __init__(self):
QTextBlockUserData.__init__(self)
self.state = SimpleState()
def clear(self, state=None):
self.state = SimpleState() if state is None else state
class SyntaxHighlighter(object):
state_map = {0:lambda state, text, i, formats:[(len(text), None)]}
create_formats_func = lambda highlighter: {}
spell_attributes = ()
tag_ok_for_spell = lambda x: False
user_data_factory = SimpleUserData
def __init__(self, *args, **kwargs):
QSyntaxHighlighter.__init__(self, *args, **kwargs)
def create_state(self, num):
return SimpleState(max(0, num))
def rehighlight(self):
self.outlineexplorer_data = {}
QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
QSyntaxHighlighter.rehighlight(self)
QApplication.restoreOverrideCursor()
def __init__(self):
self.doc = None
def apply_theme(self, theme):
self.theme = {k:highlight_to_char_format(v) for k, v in theme.iteritems()}
@ -54,20 +70,93 @@ class SyntaxHighlighter(QSyntaxHighlighter):
def create_formats(self):
self.formats = self.create_formats_func()
def highlightBlock(self, text):
try:
state = self.previousBlockState()
self.setCurrentBlockUserData(None) # Ensure that any stale user data is discarded
state = self.create_state(state)
state.get_user_data, state.set_user_data = self.currentBlockUserData, self.setCurrentBlockUserData
for i, num, fmt in run_loop(state, self.state_map, self.formats, unicode(text)):
if fmt is not None:
self.setFormat(i, num, fmt)
self.setCurrentBlockState(state.value)
except:
import traceback
traceback.print_exc()
finally:
# Disabled as it causes crashes
pass # QApplication.processEvents() # Try to keep the editor responsive to user input
def set_document(self, doc):
old_doc = self.doc
if old_doc is not None:
old_doc.contentsChange.disconnect(self.reformat_blocks)
c = QTextCursor(old_doc)
c.beginEditBlock()
blk = old_doc.begin()
while blk.isValid():
blk.layout().clearAdditionalFormats()
blk = blk.next()
c.endEditBlock()
self.doc = None
if doc is not None:
self.doc = doc
doc.contentsChange.connect(self.reformat_blocks)
self.rehighlight()
def rehighlight(self):
doc = self.doc
if doc is None:
return
lb = doc.lastBlock()
with BusyCursor():
self.reformat_blocks(0, 0, lb.position() + lb.length())
def get_user_data(self, block):
ud = block.userData()
new_data = False
if ud is None:
ud = self.user_data_factory()
block.setUserData(ud)
new_data = True
return ud, new_data
@pyqtSlot(int, int, int)
def reformat_blocks(self, position, removed, added):
doc = self.doc
if doc is None:
return
last_block = doc.findBlock(position + added + (1 if removed > 0 else 0))
if not last_block.isValid():
last_block = doc.lastBlock()
end_pos = last_block.position() + last_block.length()
force_next_highlight = False
doc.contentsChange.disconnect(self.reformat_blocks)
try:
block = doc.findBlock(position)
while block.isValid() and (block.position() < end_pos or force_next_highlight):
ud, new_ud = self.get_user_data(block)
orig_state = ud.state
pblock = block.previous()
if pblock.isValid():
start_state = pblock.userData()
if start_state is None:
start_state = self.user_data_factory().state
else:
start_state = start_state.state.copy()
else:
start_state = self.user_data_factory().state
ud.clear(state=start_state) # Ensure no stale user data lingers
formats = []
for i, num, fmt in run_loop(ud, self.state_map, self.formats, unicode(block.text())):
if fmt is not None:
formats.append((i, num, fmt))
self.apply_format_changes(doc, block, formats)
force_next_highlight = new_ud or ud.state != orig_state
block = block.next()
finally:
doc.contentsChange.connect(self.reformat_blocks)
def apply_format_changes(self, doc, block, formats):
layout = block.layout()
preedit_start = layout.preeditAreaPosition()
preedit_length = layout.preeditAreaText().length()
ranges = []
R = QTextLayout.FormatRange
for i, num, fmt in formats:
# Adjust range by pre-edit text, if any
if preedit_start != 0:
if i >= preedit_start:
i += preedit_length
elif i + num >= preedit_start:
num += preedit_length
r = R()
r.start, r.length, r.format = i, num, fmt
ranges.append(r)
layout.setAdditionalFormats(ranges)
doc.markContentsDirty(block.position(), block.length())

View File

@ -8,6 +8,8 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from PyQt4.Qt import QTextBlockUserData
from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat
from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter
@ -118,41 +120,63 @@ content_tokens = [(re.compile(k), v, n) for k, v, n in [
]]
class State(object):
NORMAL = 0
IN_COMMENT_NORMAL = 1
IN_SQS = 2
IN_DQS = 3
IN_CONTENT = 4
IN_COMMENT_CONTENT = 5
NORMAL = 0
IN_COMMENT_NORMAL = 1
IN_SQS = 2
IN_DQS = 3
IN_CONTENT = 4
IN_COMMENT_CONTENT = 5
class CSSState(object):
def __init__(self, num):
self.parse = num & 0b1111
self.blocks = num >> 4
__slots__ = ('parse', 'blocks')
@property
def value(self):
return ((self.parse & 0b1111) | (max(0, self.blocks) << 4))
def __init__(self):
self.parse = NORMAL
self.blocks = 0
def copy(self):
s = CSSState()
s.parse, s.blocks = self.parse, self.blocks
return s
def normal(state, text, i, formats):
def __eq__(self, other):
return self.parse == getattr(other, 'parse', -1) and \
self.blocks == getattr(other, 'blocks', -1)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "CSSState(parse=%s, blocks=%s)" % (self.parse, self.blocks)
__str__ = __repr__
class CSSUserData(QTextBlockUserData):
def __init__(self):
QTextBlockUserData.__init__(self)
self.state = CSSState()
def clear(self, state=None):
self.state = CSSState() if state is None else state
def normal(state, text, i, formats, user_data):
' The normal state (outside content blocks {})'
m = space_pat.match(text, i)
if m is not None:
return [(len(m.group()), None)]
cdo = cdo_pat.match(text, i)
if cdo is not None:
state.parse = State.IN_COMMENT_NORMAL
state.parse = IN_COMMENT_NORMAL
return [(len(cdo.group()), formats['comment'])]
if text[i] == '"':
state.parse = State.IN_DQS
state.parse = IN_DQS
return [(1, formats['string'])]
if text[i] == "'":
state.parse = State.IN_SQS
state.parse = IN_SQS
return [(1, formats['string'])]
if text[i] == '{':
state.parse = State.IN_CONTENT
state.parse = IN_CONTENT
state.blocks += 1
return [(1, formats['bracket'])]
for token, fmt, name in sheet_tokens:
@ -162,24 +186,24 @@ def normal(state, text, i, formats):
return [(len(text) - i, formats['unknown-normal'])]
def content(state, text, i, formats):
def content(state, text, i, formats, user_data):
' Inside content blocks '
m = space_pat.match(text, i)
if m is not None:
return [(len(m.group()), None)]
cdo = cdo_pat.match(text, i)
if cdo is not None:
state.parse = State.IN_COMMENT_CONTENT
state.parse = IN_COMMENT_CONTENT
return [(len(cdo.group()), formats['comment'])]
if text[i] == '"':
state.parse = State.IN_DQS
state.parse = IN_DQS
return [(1, formats['string'])]
if text[i] == "'":
state.parse = State.IN_SQS
state.parse = IN_SQS
return [(1, formats['string'])]
if text[i] == '}':
state.blocks -= 1
state.parse = State.NORMAL if state.blocks < 1 else State.IN_CONTENT
state.parse = NORMAL if state.blocks < 1 else IN_CONTENT
return [(1, formats['bracket'])]
if text[i] == '{':
state.blocks += 1
@ -191,34 +215,34 @@ def content(state, text, i, formats):
return [(len(text) - i, formats['unknown-normal'])]
def comment(state, text, i, formats):
def comment(state, text, i, formats, user_data):
' Inside a comment '
pos = text.find('*/', i)
if pos == -1:
return [(len(text), formats['comment'])]
state.parse = State.NORMAL if state.parse == State.IN_COMMENT_NORMAL else State.IN_CONTENT
state.parse = NORMAL if state.parse == IN_COMMENT_NORMAL else IN_CONTENT
return [(pos - i + 2, formats['comment'])]
def in_string(state, text, i, formats):
def in_string(state, text, i, formats, user_data):
'Inside a string'
q = '"' if state.parse == State.IN_DQS else "'"
q = '"' if state.parse == IN_DQS else "'"
pos = text.find(q, i)
if pos == -1:
if text[-1] == '\\':
# Multi-line string
return [(len(text) - i, formats['string'])]
state.parse = (State.NORMAL if state.blocks < 1 else State.IN_CONTENT)
state.parse = (NORMAL if state.blocks < 1 else IN_CONTENT)
return [(len(text) - i, formats['unterminated-string'])]
state.parse = (State.NORMAL if state.blocks < 1 else State.IN_CONTENT)
state.parse = (NORMAL if state.blocks < 1 else IN_CONTENT)
return [(pos - i + len(q), formats['string'])]
state_map = {
State.NORMAL:normal,
State.IN_COMMENT_NORMAL: comment,
State.IN_COMMENT_CONTENT: comment,
State.IN_SQS: in_string,
State.IN_DQS: in_string,
State.IN_CONTENT: content,
NORMAL:normal,
IN_COMMENT_NORMAL: comment,
IN_COMMENT_CONTENT: comment,
IN_SQS: in_string,
IN_DQS: in_string,
IN_CONTENT: content,
}
def create_formats(highlighter):
@ -252,9 +276,8 @@ class CSSHighlighter(SyntaxHighlighter):
state_map = state_map
create_formats_func = create_formats
user_data_factory = CSSUserData
def create_state(self, num):
return State(max(0, num))
if __name__ == '__main__':
from calibre.gui2.tweak_book.editor.widget import launch_editor

View File

@ -15,7 +15,8 @@ from PyQt4.Qt import QFont, QTextBlockUserData
from calibre.ebooks.oeb.polish.spell import html_spell_tags, xml_spell_tags
from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat
from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter, run_loop
from calibre.gui2.tweak_book.editor.syntax.css import create_formats as create_css_formats, state_map as css_state_map, State as CSSState
from calibre.gui2.tweak_book.editor.syntax.css import (
create_formats as create_css_formats, state_map as css_state_map, CSSState, CSSUserData)
from html5lib.constants import cdataElements, rcdataElements
@ -51,41 +52,33 @@ Attr = namedtuple('Attr', 'offset type data')
class Tag(object):
__slots__ = ('name', 'bold', 'italic', 'lang', 'hash')
__slots__ = ('name', 'bold', 'italic', 'lang')
def __init__(self, name, bold=None, italic=None):
self.name = name
self.bold = name in bold_tags if bold is None else bold
self.italic = name in italic_tags if italic is None else italic
self.lang = None
self.hash = 0
def __hash__(self):
return self.hash
def __eq__(self, other):
return self.name == getattr(other, 'name', None) and self.lang == getattr(other, 'lang', False)
def copy(self):
ans = Tag(self.name, self.bold, self.italic)
ans.lang, ans.hash = self.lang, self.hash
ans.lang = self.lang
return ans
def update_hash(self):
self.hash = hash((self.name, self.lang))
class State(object):
__slots__ = ('tag_being_defined', 'tags', 'is_bold', 'is_italic',
'current_lang', 'parse', 'get_user_data', 'set_user_data',
'css_formats', 'stack', 'sub_parser_state', 'default_lang',
'attribute_name',)
__slots__ = (
'tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang',
'parse', 'css_formats', 'sub_parser_state', 'default_lang', 'attribute_name',)
def __init__(self):
self.tags = []
self.is_bold = self.is_italic = False
self.tag_being_defined = self.current_lang = self.get_user_data = self.set_user_data = \
self.css_formats = self.stack = self.sub_parser_state = self.default_lang = self.attribute_name = None
self.tag_being_defined = self.current_lang = self.css_formats = \
self.sub_parser_state = self.default_lang = self.attribute_name = None
self.parse = NORMAL
def copy(self):
@ -95,17 +88,10 @@ class State(object):
self.tags = [x.copy() for x in self.tags]
if self.tag_being_defined is not None:
self.tag_being_defined = self.tag_being_defined.copy()
if self.sub_parser_state is not None:
ans.sub_parser_state = self.sub_parser_state.copy()
return ans
@property
def value(self):
if self.tag_being_defined is not None:
self.tag_being_defined.update_hash()
return self.stack.index_for(self)
def __hash__(self):
return hash((self.parse, self.sub_parser_state, self.tag_being_defined, self.attribute_name, tuple(self.tags)))
def __eq__(self, other):
return (
self.parse == getattr(other, 'parse', -1) and
@ -115,6 +101,9 @@ class State(object):
self.tags == getattr(other, 'tags', None)
)
def __ne__(self, other):
return not self.__eq__(other)
def open_tag(self, name):
self.tag_being_defined = Tag(name)
@ -128,7 +117,7 @@ class State(object):
return # No matching open tag found, ignore the closing tag
# Remove all tags upto the matching open tag
self.tags = self.tags[:-len(removed_tags)]
self.sub_parser_state = 0
self.sub_parser_state = None
# Check if we should still be bold or italic
if self.is_bold:
self.is_bold = False
@ -154,71 +143,41 @@ class State(object):
if self.tag_being_defined is None:
return
t, self.tag_being_defined = self.tag_being_defined, None
t.update_hash()
self.tags.append(t)
self.is_bold = self.is_bold or t.bold
self.is_italic = self.is_italic or t.italic
self.current_lang = t.lang or self.current_lang
if t.name in cdata_tags:
self.parse = CSS if t.name == 'style' else CDATA
self.sub_parser_state = 0
self.sub_parser_state = None
def __repr__(self):
return '<State %s is_bold=%s is_italic=%s current_lang=%s>' % (
'->'.join(x.name for x in self.tags), self.is_bold, self.is_italic, self.current_lang)
__str__ = __repr__
class Stack(object):
''' Maintain an efficient bi-directional mapping between states and index
numbers. Ensures that if state1 == state2 then their corresponding index
numbers are the same and vice versa. This is need so that the state number
passed to Qt does not change unless the underlying state has actually
changed. '''
def __init__(self):
self.index_map = []
self.state_map = {}
def index_for(self, state):
ans = self.state_map.get(state, None)
if ans is None:
self.state_map[state] = ans = len(self.index_map)
self.index_map.append(state)
return ans
def state_for(self, index):
try:
return self.index_map[index]
except IndexError:
return None
class HTMLUserData(QTextBlockUserData):
def __init__(self):
QTextBlockUserData.__init__(self)
self.tags = []
self.attributes = []
self.state = State()
self.css_user_data = None
def add_tag_data(state, tag):
ud = q = state.get_user_data()
if ud is None:
ud = HTMLUserData()
ud.tags.append(tag)
if q is None:
state.set_user_data(ud)
def clear(self, state=None):
self.tags, self.attributes = [], []
self.state = State() if state is None else state
def add_tag_data(user_data, tag):
user_data.tags.append(tag)
ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object()
def add_attr_data(state, data_type, data, offset):
ud = q = state.get_user_data()
if ud is None:
ud = HTMLUserData()
ud.attributes.append(Attr(offset, data_type, data))
if q is None:
state.set_user_data(ud)
def add_attr_data(user_data, data_type, data, offset):
user_data.attributes.append(Attr(offset, data_type, data))
def css(state, text, i, formats):
def css(state, text, i, formats, user_data):
' Inside a <style> tag '
pat = cdata_close_pats['style']
m = pat.search(text, i)
@ -227,18 +186,18 @@ def css(state, text, i, formats):
else:
css_text = text[i:m.start()]
ans = []
css_state = CSSState(state.sub_parser_state)
for j, num, fmt in run_loop(css_state, css_state_map, state.css_formats, css_text):
css_user_data = user_data.css_user_data = user_data.css_user_data or CSSUserData()
state.sub_parser_state = css_user_data.state = state.sub_parser_state or CSSState()
for j, num, fmt in run_loop(css_user_data, css_state_map, formats['css_sub_formats'], css_text):
ans.append((num, fmt))
state.sub_parser_state = css_state.value
if m is not None:
state.sub_parser_state = 0
state.sub_parser_state = None
state.parse = IN_CLOSING_TAG
add_tag_data(state, TagStart(m.start(), 'style', '', True, True))
add_tag_data(user_data, TagStart(m.start(), '', 'style', True, True))
ans.extend([(2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])])
return ans
def cdata(state, text, i, formats):
def cdata(state, text, i, formats, user_data):
'CDATA inside tags like <title> or <style>'
name = state.tags[-1].name
pat = cdata_close_pats[name]
@ -248,7 +207,7 @@ def cdata(state, text, i, formats):
return [(len(text) - i, fmt)]
state.parse = IN_CLOSING_TAG
num = m.start() - i
add_tag_data(state, TagStart(m.start(), name, '', True, True))
add_tag_data(user_data, TagStart(m.start(), '', name, True, True))
return [(num, fmt), (2, formats['end_tag']), (len(m.group()) - 2, formats['tag_name'])]
def mark_nbsp(state, text, nbsp_format):
@ -268,7 +227,7 @@ def mark_nbsp(state, text, nbsp_format):
ans = [(len(text), fmt)]
return ans
def normal(state, text, i, formats):
def normal(state, text, i, formats, user_data):
' The normal state in between tags '
ch = text[i]
if ch == '<':
@ -288,18 +247,23 @@ def normal(state, text, i, formats):
if m is None:
return [(1, formats['<'])]
name = m.group()
closing = name.startswith('/')
state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
ans = [(2 if closing else 1, formats['end_tag' if closing else 'tag'])]
tname = m.group()
closing = tname.startswith('/')
if closing:
name = name[1:]
prefix, name = name.partition(':')[0::2]
if prefix and name:
tname = tname[1:]
if ':' in tname:
prefix, name = tname.split(':', 1)
else:
prefix, name = '', tname
if prefix and not name:
return [(len(m.group()) + 1, formats['only-prefix'])]
ans = [(2 if closing else 1, formats['end_tag' if closing else 'tag'])]
if prefix:
ans.append((len(prefix)+1, formats['nsprefix']))
ans.append((len(name or prefix), formats['tag_name']))
add_tag_data(state, TagStart(i, prefix, name, closing, True))
(state.close_tag if closing else state.open_tag)(name or prefix)
ans.append((len(name), formats['tag_name']))
state.parse = IN_CLOSING_TAG if closing else IN_OPENING_TAG
add_tag_data(user_data, TagStart(i, prefix, name, closing, True))
(state.close_tag if closing else state.open_tag)(name)
return ans
if ch == '&':
@ -314,7 +278,7 @@ def normal(state, text, i, formats):
t = normal_pat.search(text, i).group()
return mark_nbsp(state, t, formats['nbsp'])
def opening_tag(cdata_tags, state, text, i, formats):
def opening_tag(cdata_tags, state, text, i, formats, user_data):
'An opening tag, like <a>'
ch = text[i]
if ch in space_chars:
@ -325,24 +289,26 @@ def opening_tag(cdata_tags, state, text, i, formats):
return [(1, formats['/'])]
state.parse = NORMAL
l = len(m.group())
add_tag_data(state, TagEnd(i + l - 1, True, False))
add_tag_data(user_data, TagEnd(i + l - 1, True, False))
return [(l, formats['tag'])]
if ch == '>':
state.finish_opening_tag(cdata_tags)
add_tag_data(state, TagEnd(i, False, False))
add_tag_data(user_data, TagEnd(i, False, False))
return [(1, formats['tag'])]
m = attribute_name_pat.match(text, i)
if m is None:
return [(1, formats['?'])]
state.parse = ATTRIBUTE_NAME
attrname = state.attribute_name = m.group()
add_attr_data(state, ATTR_NAME, attrname, m.start())
add_attr_data(user_data, ATTR_NAME, attrname, m.start())
prefix, name = attrname.partition(':')[0::2]
if not prefix and not name:
return [(len(attrname), formats['?'])]
if prefix and name:
return [(len(prefix) + 1, formats['nsprefix']), (len(name), formats['attr'])]
return [(len(prefix), formats['attr'])]
def attribute_name(state, text, i, formats):
def attribute_name(state, text, i, formats, user_data):
' After attribute name '
ch = text[i]
if ch in space_chars:
@ -354,7 +320,7 @@ def attribute_name(state, text, i, formats):
state.parse = IN_OPENING_TAG
return [(0, None)]
def attribute_value(state, text, i, formats):
def attribute_value(state, text, i, formats, user_data):
' After attribute = '
ch = text[i]
if ch in space_chars:
@ -368,20 +334,20 @@ def attribute_value(state, text, i, formats):
return [(1, formats['no-attr-value'])]
return [(len(m.group()), formats['string'])]
def quoted_val(state, text, i, formats):
def quoted_val(state, text, i, formats, user_data):
' A quoted attribute value '
quote = '"' if state.parse is DQ_VAL else "'"
add_attr_data(state, ATTR_VALUE, ATTR_START, i)
add_attr_data(user_data, ATTR_VALUE, ATTR_START, i)
pos = text.find(quote, i)
if pos == -1:
num = len(text) - i
else:
num = pos - i + 1
state.parse = IN_OPENING_TAG
add_attr_data(state, ATTR_VALUE, ATTR_END, i + num)
add_attr_data(user_data, ATTR_VALUE, ATTR_END, i + num)
return [(num, formats['string'])]
def closing_tag(state, text, i, formats):
def closing_tag(state, text, i, formats, user_data):
' A closing tag like </a> '
ch = text[i]
if ch in space_chars:
@ -394,10 +360,10 @@ def closing_tag(state, text, i, formats):
ans = [(1, formats['end_tag'])]
if num > 1:
ans.insert(0, (num - 1, formats['bad-closing']))
add_tag_data(state, TagEnd(pos, False, False))
add_tag_data(user_data, TagEnd(pos, False, False))
return ans
def in_comment(state, text, i, formats):
def in_comment(state, text, i, formats, user_data):
' Comment, processing instruction or doctype '
end = {IN_COMMENT:'-->', IN_PI:'?>'}.get(state.parse, '>')
pos = text.find(end, i)
@ -428,7 +394,7 @@ for x in (SQ_VAL, DQ_VAL):
xml_state_map = state_map.copy()
xml_state_map[IN_OPENING_TAG] = partial(opening_tag, set())
def create_formats(highlighter):
def create_formats(highlighter, add_css=True):
t = highlighter.theme
formats = {
'tag': t['Function'],
@ -452,11 +418,14 @@ def create_formats(highlighter):
'?': _('Unknown character'),
'bad-closing': _('A closing tag must contain only the tag name and nothing else'),
'no-attr-value': _('Expecting an attribute value'),
'only-prefix': _('A tag name cannot end with a colon'),
}.iteritems():
f = formats[name] = SyntaxTextCharFormat(formats['error'])
f.setToolTip(msg)
f = formats['title'] = SyntaxTextCharFormat()
f.setFontWeight(QFont.Bold)
if add_css:
formats['css_sub_formats'] = create_css_formats(highlighter)
return formats
@ -465,18 +434,7 @@ class HTMLHighlighter(SyntaxHighlighter):
state_map = state_map
create_formats_func = create_formats
spell_attributes = ('alt', 'title')
def create_formats(self):
super(HTMLHighlighter, self).create_formats()
self.default_state = State()
self.default_state.css_formats = create_css_formats(self)
self.default_state.stack = Stack()
def create_state(self, val):
if val < 0:
return self.default_state.copy()
ans = self.default_state.stack.state_for(val) or self.default_state
return ans.copy()
user_data_factory = HTMLUserData
def tag_ok_for_spell(self, name):
return name not in html_spell_tags
@ -486,6 +444,9 @@ class XMLHighlighter(HTMLHighlighter):
state_map = xml_state_map
spell_attributes = ('opf:file-as',)
def create_formats_func(self):
return create_formats(self, add_css=False)
def tag_ok_for_spell(self, name):
return name in xml_spell_tags
@ -507,7 +468,7 @@ if __name__ == '__main__':
</style>
<style type="text/css">p.small { font-size: x-small; color:gray }</style>
</head id="invalid attribute on closing tag">
<body>
<body><p:
<!-- The start of the actual body text -->
<h1>A heading that should appear in bold, with an <i>italic</i> word</h1>
<p>Some text with inline formatting, that is syntax highlighted. A <b>bold</b> word, and an <em>italic</em> word. \

View File

@ -14,7 +14,7 @@ import regex
from PyQt4.Qt import (
QPlainTextEdit, QFontDatabase, QToolTip, QPalette, QFont, QKeySequence,
QTextEdit, QTextFormat, QWidget, QSize, QPainter, Qt, QRect, pyqtSlot,
QApplication, QMimeData, QColor, QColorDialog)
QApplication, QMimeData, QColor, QColorDialog, QTimer)
from calibre import prepare_string_for_xml, xml_entity_to_unicode
from calibre.gui2.tweak_book import tprefs, TOP
@ -135,7 +135,9 @@ class TextEdit(PlainTextEdit):
self.smarts = NullSmarts(self)
self.current_cursor_line = None
self.current_search_mark = None
self.highlighter = SyntaxHighlighter(self)
self.smarts_highlight_timer = t = QTimer()
t.setInterval(750), t.setSingleShot(True), t.timeout.connect(self.update_extra_selections)
self.highlighter = SyntaxHighlighter()
self.line_number_area = LineNumbers(self)
self.apply_settings()
self.setMouseTracking(True)
@ -206,9 +208,9 @@ class TextEdit(PlainTextEdit):
def load_text(self, text, syntax='html', process_template=False):
self.syntax = syntax
self.highlighter = get_highlighter(syntax)(self)
self.highlighter = get_highlighter(syntax)()
self.highlighter.apply_theme(self.theme)
self.highlighter.setDocument(self.document())
self.highlighter.set_document(self.document())
sclass = {'html':HTMLSmarts, 'xml':HTMLSmarts}.get(syntax, None)
if sclass is not None:
self.smarts = sclass(self)
@ -252,13 +254,16 @@ class TextEdit(PlainTextEdit):
self.setTextCursor(c)
self.ensureCursorVisible()
def update_extra_selections(self):
def update_extra_selections(self, instant=True):
sel = []
if self.current_cursor_line is not None:
sel.append(self.current_cursor_line)
if self.current_search_mark is not None:
sel.append(self.current_search_mark)
if instant:
sel.extend(self.smarts.get_extra_selections(self))
else:
self.smarts_highlight_timer.start()
self.setExtraSelections(sel)
# Search and replace {{{
@ -456,7 +461,7 @@ class TextEdit(PlainTextEdit):
sel.cursor = self.textCursor()
sel.cursor.clearSelection()
self.current_cursor_line = sel
self.update_extra_selections()
self.update_extra_selections(instant=False)
# Update the cursor line's line number in the line number area
try:
self.line_number_area.update(0, self.last_current_lnum[0], self.line_number_area.width(), self.last_current_lnum[1])

View File

@ -393,9 +393,6 @@ class FileList(QTreeWidget):
for name, linear in container.spine_names:
processed[name] = create_item(name, linear=linear)
all_files = list(container.manifest_type_map.iteritems())
all_files.append((guess_type('a.opf'), [container.opf_name]))
for name in container.name_path_map:
if name in processed:
continue

View File

@ -84,6 +84,11 @@ class WhereBox(QComboBox):
<dd>Search only within the marked text in the currently opened file. You can mark text using the Search menu.</dd>
</dl>'''))
self.emphasize = emphasize
self.ofont = QFont(self.font())
if emphasize:
f = self.emph_font = QFont(self.ofont)
f.setBold(True), f.setItalic(True)
self.setFont(f)
@dynamic_property
def where(self):
@ -94,16 +99,16 @@ class WhereBox(QComboBox):
self.setCurrentIndex({v:k for k, v in wm.iteritems()}[val])
return property(fget=fget, fset=fset)
def paintEvent(self, ev):
def showPopup(self):
# We do it like this so that the popup uses a normal font
if self.emphasize:
ofont = self.font()
f = QFont(ofont)
f.setBold(True), f.setItalic(True)
self.setFont(f)
QComboBox.paintEvent(self, ev)
self.setFont(self.ofont)
QComboBox.showPopup(self)
def hidePopup(self):
if self.emphasize:
self.setFont(ofont)
self.setFont(self.emph_font)
QComboBox.hidePopup(self)
class DirectionBox(QComboBox):
@ -766,7 +771,7 @@ class SavedSearches(Dialog):
def err():
error_dialog(self, _('Invalid data'), _(
'The file %s does not contain valid saved searches') % path, show=True)
if not isinstance(obj, dict) or not 'version' in obj or not 'searches' in obj or obj['version'] not in (1,):
if not isinstance(obj, dict) or 'version' not in obj or 'searches' not in obj or obj['version'] not in (1,):
return err()
searches = []
for item in obj['searches']:

View File

@ -16,7 +16,7 @@ from PyQt4.Qt import (
QStackedLayout, QLabel, QVBoxLayout, QWidget, QPushButton, QIcon, QMenu,
QDialogButtonBox, QLineEdit, QDialog, QToolButton, QFormLayout, QHBoxLayout,
pyqtSignal, QAbstractTableModel, QModelIndex, QTimer, QTableView, QCheckBox,
QComboBox, QListWidget, QListWidgetItem, QInputDialog, QPlainTextEdit)
QComboBox, QListWidget, QListWidgetItem, QInputDialog, QPlainTextEdit, QKeySequence)
from calibre.constants import __appname__, plugins
from calibre.ebooks.oeb.polish.spell import replace_word, get_all_words, merge_locations
@ -122,7 +122,40 @@ class AddDictionary(QDialog): # {{{
QDialog.accept(self)
# }}}
class ManageUserDictionaries(Dialog): # {{{
# User Dictionaries {{{
class UserWordList(QListWidget):
def __init__(self, parent=None):
QListWidget.__init__(self, parent)
def contextMenuEvent(self, ev):
m = QMenu(self)
m.addAction(_('Copy selected words to clipboard'), self.copy_to_clipboard)
m.addAction(_('Select all words'), self.select_all)
m.exec_(ev.globalPos())
def select_all(self):
for item in (self.item(i) for i in xrange(self.count())):
item.setSelected(True)
def copy_to_clipboard(self):
words = []
for item in (self.item(i) for i in xrange(self.count())):
if item.isSelected():
words.append(item.data(Qt.UserRole).toPyObject()[0])
if words:
QApplication.clipboard().setText('\n'.join(words))
def keyPressEvent(self, ev):
if ev == QKeySequence.Copy:
self.copy_to_clipboard()
ev.accept()
return
return QListWidget.keyPressEvent(self, ev)
class ManageUserDictionaries(Dialog):
def __init__(self, parent=None):
self.dictionaries_changed = False
@ -162,7 +195,7 @@ class ManageUserDictionaries(Dialog): # {{{
l.addWidget(a)
self.la = la = QLabel(_('Words in this dictionary:'))
l.addWidget(la)
self.words = w = QListWidget(self)
self.words = w = UserWordList(self)
w.setSelectionMode(w.ExtendedSelection)
l.addWidget(w)
self.add_word_button = b = QPushButton(_('&Add word'), self)
@ -586,6 +619,11 @@ class WordsModel(QAbstractTableModel):
elif role == Qt.InitialSortOrderRole:
return Qt.DescendingOrder if section == 1 else Qt.AscendingOrder
def misspelled_text(self, w):
if self.spell_map[w]:
return _('Ignored') if dictionaries.is_word_ignored(*w) else ''
return ''
def data(self, index, role=Qt.DisplayRole):
try:
word, locale = self.items[index.row()]
@ -604,7 +642,7 @@ class WordsModel(QAbstractTableModel):
pl = '%s (%s)' % (pl, countrycode)
return pl
if col == 3:
return '' if self.spell_map[(word, locale)] else ''
return self.misspelled_text((word, locale))
if role == Qt.TextAlignmentRole:
return Qt.AlignVCenter | (Qt.AlignLeft if index.column() == 0 else Qt.AlignHCenter)
@ -635,7 +673,7 @@ class WordsModel(QAbstractTableModel):
locale = w[1]
return (calibre_langcode_to_name(locale.langcode), locale.countrycode)
else:
key = self.spell_map.get
key = self.misspelled_text
return key
def do_sort(self):
@ -762,6 +800,10 @@ class WordsView(QTableView):
self.verticalHeader().close()
def keyPressEvent(self, ev):
if ev == QKeySequence.Copy:
self.copy_to_clipboard()
ev.accept()
return
ret = QTableView.keyPressEvent(self, ev)
if ev.key() in (Qt.Key_PageUp, Qt.Key_PageDown, Qt.Key_Up, Qt.Key_Down):
idx = self.currentIndex()
@ -794,9 +836,19 @@ class WordsView(QTableView):
a.setMenu(am)
for dic in sorted(dictionaries.active_user_dictionaries, key=lambda x:sort_key(x.name)):
am.addAction(dic.name, partial(self.add_all.emit, dic.name))
m.addSeparator()
m.addAction(_('Copy selected words to clipboard'), self.copy_to_clipboard)
m.exec_(ev.globalPos())
def copy_to_clipboard(self):
rows = {i.row() for i in self.selectedIndexes()}
words = {self.model().word_for_row(r) for r in rows}
words.discard(None)
words = sorted({w[0] for w in words}, key=sort_key)
if words:
QApplication.clipboard().setText('\n'.join(words))
class SpellCheck(Dialog):
work_finished = pyqtSignal(object, object)
@ -1202,5 +1254,5 @@ def find_next(word, locations, current_editor, current_editor_name,
if __name__ == '__main__':
app = QApplication([])
dictionaries.initialize()
SpellCheck.test()
ManageUserDictionaries.test()
del app

View File

@ -18,9 +18,10 @@ from PyQt4.Qt import (
QListView, QTextDocument, QSize, QComboBox, QFrame, QCursor)
from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.polish.utils import lead_text
from calibre.gui2 import error_dialog, choose_files, choose_save_file, NONE, info_dialog
from calibre.gui2.tweak_book import tprefs
from calibre.utils.icu import primary_sort_key, sort_key
from calibre.utils.icu import primary_sort_key, sort_key, primary_contains
from calibre.utils.matcher import get_char, Matcher
from calibre.gui2.complete2 import EditWithComplete
@ -568,11 +569,12 @@ class NamesModel(QAbstractListModel):
if text == name:
return i
def create_filterable_names_list(names, filter_text=None, parent=None):
def create_filterable_names_list(names, filter_text=None, parent=None, model=NamesModel):
nl = QListView(parent)
nl.m = m = NamesModel(names, parent=nl)
nl.m = m = model(names, parent=nl)
m.filtered.connect(lambda all_items: nl.scrollTo(m.index(0)))
nl.setModel(m)
if model is NamesModel:
nl.d = NamesDelegate(nl)
nl.setItemDelegate(nl.d)
f = QLineEdit(parent)
@ -583,6 +585,39 @@ def create_filterable_names_list(names, filter_text=None, parent=None):
# }}}
# Insert Link {{{
class AnchorsModel(QAbstractListModel):
filtered = pyqtSignal(object)
def __init__(self, names, parent=None):
self.items = []
self.names = []
QAbstractListModel.__init__(self, parent=parent)
def rowCount(self, parent=ROOT):
return len(self.items)
def data(self, index, role):
if role == Qt.UserRole:
return self.items[index.row()]
if role == Qt.DisplayRole:
return '\n'.join(self.items[index.row()])
if role == Qt.ToolTipRole:
text, frag = self.items[index.row()]
return _('Anchor: %s\nLeading text: %s') % (frag, text)
def set_names(self, names):
self.names = names
self.filter('')
def filter(self, query):
query = unicode(query or '')
self.beginResetModel()
self.items = [x for x in self.names if primary_contains(query, x[0]) or primary_contains(query, x[1])]
self.endResetModel()
self.filtered.emit(not bool(query))
class InsertLink(Dialog):
def __init__(self, container, source_name, initial_text=None, parent=None):
@ -612,7 +647,8 @@ class InsertLink(Dialog):
fnl.addWidget(la), fnl.addWidget(f), fnl.addWidget(fn)
h.addLayout(fnl), h.setStretch(0, 2)
fn, f = create_filterable_names_list([], filter_text=_('Filter locations'), parent=self)
fn, f = create_filterable_names_list([], filter_text=_('Filter locations'), parent=self, model=AnchorsModel)
fn.setSpacing(5)
self.anchor_names, self.anchor_names_filter = fn, f
fn.selectionModel().selectionChanged.connect(self.update_target)
fn.doubleClicked.connect(self.accept, type=Qt.QueuedConnection)
@ -648,8 +684,12 @@ class InsertLink(Dialog):
if name not in self.anchor_cache:
from calibre.ebooks.oeb.base import XHTML_NS
root = self.container.parsed(name)
self.anchor_cache[name] = sorted(
(set(root.xpath('//*/@id')) | set(root.xpath('//h:a/@name', namespaces={'h':XHTML_NS}))) - {''}, key=primary_sort_key)
ac = self.anchor_cache[name] = []
for item in set(root.xpath('//*[@id]')) | set(root.xpath('//h:a[@name]', namespaces={'h':XHTML_NS})):
frag = item.get('id', None) or item.get('name')
text = lead_text(item, num_words=4)
ac.append((text, frag))
ac.sort(key=lambda (text, frag): primary_sort_key(text))
self.anchor_names.model().set_names(self.anchor_cache[name])
self.update_target()
@ -665,7 +705,7 @@ class InsertLink(Dialog):
frag = ''
rows = list(self.anchor_names.selectionModel().selectedRows())
if rows:
anchor = self.anchor_names.model().data(rows[0], Qt.UserRole).toPyObject()[0]
anchor = self.anchor_names.model().data(rows[0], Qt.UserRole)[1]
if anchor:
frag = '#' + anchor
href += frag
@ -886,4 +926,4 @@ class InsertSemantics(Dialog):
if __name__ == '__main__':
app = QApplication([])
InsertTag.test()
InsertLink.test()

View File

@ -1017,7 +1017,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.iterator = EbookIterator(pathtoebook)
self.open_progress_indicator(_('Loading ebook...'))
worker = Worker(target=partial(self.iterator.__enter__,
extract_embedded_fonts_for_qt=True))
extract_embedded_fonts_for_qt=True, view_kepub=True))
worker.start()
while worker.isAlive():
worker.join(0.1)

View File

@ -929,7 +929,7 @@ Name=E-book Viewer
GenericName=Viewer for E-books
Comment=Viewer for E-books in all the major formats
TryExec=ebook-viewer
Exec=ebook-viewer %f
Exec=ebook-viewer --detach %f
Icon=calibre-viewer
Categories=Graphics;Viewer;
'''
@ -942,7 +942,7 @@ Name=Edit E-book
GenericName=Edit E-books
Comment=Edit e-books in various formats
TryExec=ebook-edit
Exec=ebook-edit %f
Exec=ebook-edit --detach %f
Icon=calibre-ebook-edit
Categories=Office;
'''
@ -955,7 +955,7 @@ Name=calibre
GenericName=E-book library management
Comment=E-book library management: Convert, view, share, catalogue all your e-books
TryExec=calibre
Exec=calibre %F
Exec=calibre --detach %F
Icon=calibre-gui
Categories=Office;
'''

View File

@ -101,7 +101,10 @@ def custom_dictionaries(reread=False):
return _custom
default_en_locale = 'en-US'
ul = parse_lang_code(get_system_locale() or 'en-US')
try:
ul = parse_lang_code(get_system_locale() or 'en-US')
except ValueError:
ul = None
if ul is not None and ul.langcode == 'eng' and ul.countrycode in 'GB BS BZ GH IE IN JM NZ TT'.split():
default_en_locale = 'en-' + ul.countrycode
default_preferred_locales = {'eng':default_en_locale, 'deu':'de-DE', 'spa':'es-ES', 'fra':'fr-FR'}

View File

@ -68,6 +68,18 @@ def import_from_libreoffice_source_tree(source_path):
if want_locales:
raise Exception('Failed to find dictionaries for some wanted locales: %s' % want_locales)
def fill_country_code(x):
return {'lt':'lt_LT'}.get(x, x)
def uniq(vals, kmap=lambda x:x):
''' Remove all duplicates from vals, while preserving order. kmap must be a
callable that returns a hashable value for every item in vals '''
vals = vals or ()
lvals = (kmap(x) for x in vals)
seen = set()
seen_add = seen.add
return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k))
def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'):
from calibre.spell.dictionary import parse_lang_code
dest_dir = dest_dir or os.path.join(config_dir, 'dictionaries')
@ -81,10 +93,10 @@ def import_from_oxt(source_path, name, dest_dir=None, prefix='dic-'):
for (dic, aff), locales in parse_xcu(zf.open(xcu).read(), origin='').iteritems():
dic, aff = dic.lstrip('/'), aff.lstrip('/')
d = tempfile.mkdtemp(prefix=prefix, dir=dest_dir)
locales = [x for x in locales if parse_lang_code(x).countrycode]
locales = uniq([x for x in map(fill_country_code, locales) if parse_lang_code(x).countrycode])
if not locales:
continue
metadata = [name] + locales
metadata = [name] + list(locales)
with open(os.path.join(d, 'locales'), 'wb') as f:
f.write(('\n'.join(metadata)).encode('utf-8'))
with open(os.path.join(d, '%s.dic' % locales[0]), 'wb') as f:

View File

@ -459,3 +459,34 @@ def atomic_rename(oldpath, newpath):
time.sleep(1)
else:
os.rename(oldpath, newpath)
def remove_dir_if_empty(path, ignore_metadata_caches=False):
''' Remove a directory if it is empty or contains only the folder metadata
caches from different OSes. To delete the folder if it contains only
metadata caches, set ignore_metadata_caches to True.'''
try:
os.rmdir(path)
except OSError as e:
if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0:
# Some linux systems appear to raise an EPERM instead of an
# ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797
if ignore_metadata_caches:
try:
found = False
for x in os.listdir(path):
if x.lower() in {'.ds_store', 'thumbs.db'}:
found = True
x = os.path.join(path, x)
if os.path.isdir(x):
import shutil
shutil.rmtree(x)
else:
os.remove(x)
except Exception: # We could get an error, if, for example, windows has locked Thumbs.db
found = False
if found:
remove_dir_if_empty(path)
return
raise

View File

@ -116,13 +116,12 @@ class TestICU(unittest.TestCase):
for group in [
('Šaa', 'Smith', 'Solženicyn', 'Štepánek'),
('01', '1'),
('1', '11', '13'),
]:
last = None
for x in group:
order, length = icu.numeric_collator().collation_order(x)
if last is not None:
self.ae(last, order)
self.ae(last, order, 'Order for %s not correct: %s != %s' % (x, last, order))
last = order
self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),

View File

@ -696,6 +696,10 @@ def educateQuotes(str):
""" % (close_class,), re.VERBOSE)
str = closing_double_quotes_regex.sub(r"""\1&#8221;""", str)
if str.endswith('-"'):
# A string that endswith -" is sometimes used for dialogue
str = str[:-1] + '&#8221;'
# Any remaining quotes should be opening ones.
str = re.sub(r'"', r"""&#8220;""", str)
@ -859,8 +863,8 @@ def _tokenize(str):
tokens = []
#depth = 6
#nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
# depth = 6
# nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
# match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments
# (?: <\? .*? \?> ) | # directives
# %s # nested tags """ % (nested_tags,)

View File

@ -17,7 +17,8 @@ class UNRARError(Exception):
pass
class DevNull:
def write(self, x): pass
def write(self, x):
pass
class RARStream(object):
@ -184,15 +185,15 @@ def extract_member(stream, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I),
return h['filename'], et.getvalue()
def extract_first_alphabetically(stream):
names_ = [x for x in names(stream) if os.path.splitext(x)[1][1:].lower() in
{'png', 'jpg', 'jpeg', 'gif'}]
names_.sort()
names_ = sorted([x for x in names(stream) if os.path.splitext(x)[1][1:].lower() in
{'png', 'jpg', 'jpeg', 'gif'}])
return extract_member(stream, name=names_[0], match=None)
# Test normal RAR file {{{
def test_basic():
stream = BytesIO(b"Rar!\x1a\x07\x00\xcf\x90s\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x14\xe7z\x00\x80#\x00\x17\x00\x00\x00\r\x00\x00\x00\x03\xc2\xb3\x96o\x00\x00\x00\x00\x1d3\x03\x00\x00\x00\x00\x00CMT\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe6h\x04\x17\xff\xcd\x0f\xffk9b\x11]^\x80\xd3dt \x90+\x00\x14\x00\x00\x00\x08\x00\x00\x00\x03\xf1\x84\x93\\\xb9]yA\x1d3\t\x00\xa4\x81\x00\x001\\sub-one\x00\xc0\x0c\x00\x8f\xec\x89\xfe.JM\x86\x82\x0c_\xfd\xfd\xd7\x11\x1a\xef@\x9eHt \x80'\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x03\x9f\xa8\x17\xf8\xaf]yA\x1d3\x07\x00\xa4\x81\x00\x00one.txt\x00\x08\xbf\x08\xae\xf3\xca\x87\xfeo\xfe\xd2n\x80-Ht \x82:\x00\x18\x00\x00\x00\x10\x00\x00\x00\x03\xa86\x81\xdf\xf9fyA\x1d3\x1a\x00\xa4\x81\x00\x00\xe8\xaf\xb6\xe6\xaf\x94\xe5\xb1\x81.txt\x00\x8bh\xf6\xd4kA\\.\x00txt\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe2l\x91\x189\xff\xdf\xfe\xc2\xd3:g\x9a\x19F=cYt \x928\x00\x11\x00\x00\x00\x08\x00\x00\x00\x03\x7f\xd6\xb6\x7f\xeafyA\x1d3\x16\x00\xa4\x81\x00\x00F\xc3\xbc\xc3\x9fe.txt\x00\x01\x00F\xfc\xdfe\x00.txt\x00\xc0<D\xfe\xc8\xef\xbc\xd1\x04I?\xfd\xff\xdbF)]\xe8\xb9\xe1t \x90/\x00\x13\x00\x00\x00\x08\x00\x00\x00\x03\x1a$\x932\xc2]yA\x1d3\r\x00\xa4\x81\x00\x002\\sub-two.txt\x00\xc0\x10\x00S\xec\xcb\x7f\x8b\xa5(\x0b\x01\xcb\xef\xdf\xf6t\x89\x97z\x0eft \x90)\x00\r\x00\x00\x00\r\x00\x00\x00\x03c\x89K\xd3\xc8fyA\x140\x07\x00\xff\xa1\x00\x00symlink\x00\xc02/sub-two.txt\xeb\x86t\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xb9]yA\x140\x01\x00\xedA\x00\x001\x00\xc0\xe0Dt\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xc2]yA\x140\x01\x00\xedA\x00\x002\x00\xc0u\xa1t \x80,\x00\r\x00\x00\x00\r\x00\x00\x00\x03T\xea\x04\xca\xe6\x84yA\x140\x0c\x00\xa4\x81\x00\x00uncompresseduncompressed\n\xda\x10t \x900\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x035K.\xa6\x18\x85yA\x1d5\x0e\x00\xa4\x81\x00\x00max-compressed\x00\xc0\x00\x08\xbf\x08\xae\xf2\xcc\x01s\xf8\xff\xec\x96\xe8\xc4={\x00@\x07\x00")
stream = BytesIO(
b"Rar!\x1a\x07\x00\xcf\x90s\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x14\xe7z\x00\x80#\x00\x17\x00\x00\x00\r\x00\x00\x00\x03\xc2\xb3\x96o\x00\x00\x00\x00\x1d3\x03\x00\x00\x00\x00\x00CMT\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe6h\x04\x17\xff\xcd\x0f\xffk9b\x11]^\x80\xd3dt \x90+\x00\x14\x00\x00\x00\x08\x00\x00\x00\x03\xf1\x84\x93\\\xb9]yA\x1d3\t\x00\xa4\x81\x00\x001\\sub-one\x00\xc0\x0c\x00\x8f\xec\x89\xfe.JM\x86\x82\x0c_\xfd\xfd\xd7\x11\x1a\xef@\x9eHt \x80'\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x03\x9f\xa8\x17\xf8\xaf]yA\x1d3\x07\x00\xa4\x81\x00\x00one.txt\x00\x08\xbf\x08\xae\xf3\xca\x87\xfeo\xfe\xd2n\x80-Ht \x82:\x00\x18\x00\x00\x00\x10\x00\x00\x00\x03\xa86\x81\xdf\xf9fyA\x1d3\x1a\x00\xa4\x81\x00\x00\xe8\xaf\xb6\xe6\xaf\x94\xe5\xb1\x81.txt\x00\x8bh\xf6\xd4kA\\.\x00txt\x0c\x00\x8b\xec\x8e\xef\x14\xf6\xe2l\x91\x189\xff\xdf\xfe\xc2\xd3:g\x9a\x19F=cYt \x928\x00\x11\x00\x00\x00\x08\x00\x00\x00\x03\x7f\xd6\xb6\x7f\xeafyA\x1d3\x16\x00\xa4\x81\x00\x00F\xc3\xbc\xc3\x9fe.txt\x00\x01\x00F\xfc\xdfe\x00.txt\x00\xc0<D\xfe\xc8\xef\xbc\xd1\x04I?\xfd\xff\xdbF)]\xe8\xb9\xe1t \x90/\x00\x13\x00\x00\x00\x08\x00\x00\x00\x03\x1a$\x932\xc2]yA\x1d3\r\x00\xa4\x81\x00\x002\\sub-two.txt\x00\xc0\x10\x00S\xec\xcb\x7f\x8b\xa5(\x0b\x01\xcb\xef\xdf\xf6t\x89\x97z\x0eft \x90)\x00\r\x00\x00\x00\r\x00\x00\x00\x03c\x89K\xd3\xc8fyA\x140\x07\x00\xff\xa1\x00\x00symlink\x00\xc02/sub-two.txt\xeb\x86t\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xb9]yA\x140\x01\x00\xedA\x00\x001\x00\xc0\xe0Dt\xe0\x90#\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xc2]yA\x140\x01\x00\xedA\x00\x002\x00\xc0u\xa1t \x80,\x00\r\x00\x00\x00\r\x00\x00\x00\x03T\xea\x04\xca\xe6\x84yA\x140\x0c\x00\xa4\x81\x00\x00uncompresseduncompressed\n\xda\x10t \x900\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x035K.\xa6\x18\x85yA\x1d5\x0e\x00\xa4\x81\x00\x00max-compressed\x00\xc0\x00\x08\xbf\x08\xae\xf2\xcc\x01s\xf8\xff\xec\x96\xe8\xc4={\x00@\x07\x00") # noqa
tdata = {u'1': b'',
u'1/sub-one': b'sub-one\n',
u'2': b'',
@ -234,20 +235,23 @@ def test_basic():
from calibre.utils.mem import memory
import gc
del f
for i in xrange(3): gc.collect()
num = 300
for i in xrange(3):
gc.collect()
def get_mem_use(num):
start = memory()
s = SaveStream(stream)
for i in xrange(num):
with s:
f = RARFile(stream)
f.test()
del f
del s
for i in xrange(3): gc.collect()
used = memory() - start
if used > 1 and not isosx:
raise ValueError('Leaked %s MB for %d calls'%(used, num))
del f, s
for i in xrange(3):
gc.collect()
return memory() - start
(get_mem_use(20))
a, b = get_mem_use(10), get_mem_use(110)
if not isosx and abs(b - a) > 1:
raise ValueError('Leaked %s MB for %d calls'%(b - a, 100))
# }}}
def test_rar(path):

View File

@ -542,7 +542,7 @@ class RecursiveFetcher(object):
_fname.decode('latin1', 'replace')
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
_fname = ascii_filename(_fname)
_fname = os.path.splitext(_fname)[0]+'.xhtml'
_fname = os.path.splitext(_fname)[0][:120] + '.xhtml'
res = os.path.join(linkdiskpath, _fname)
self.downloaded_paths.append(res)
self.filemap[nurl] = res