Merge from trunk

This commit is contained in:
Charles Haley 2012-11-07 18:16:15 +01:00
commit 9868fffb02
29 changed files with 1287 additions and 578 deletions

View File

@ -35,3 +35,7 @@ nbproject/
.settings/
*.DS_Store
calibre_plugins/
recipes/.git
recipes/.gitignore
recipes/README
recipes/katalog_egazeciarz.recipe

View File

@ -327,9 +327,8 @@ You can browse your |app| collection on your Android device is by using the
calibre content server, which makes your collection available over the net.
First perform the following steps in |app|
* Set the :guilabel:`Preferred Output Format` in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
* Set the output profile to Tablet (this will work for phones as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
* Convert the books you want to read on your device to EPUB format by selecting them and clicking the Convert button.
* Set the :guilabel:`Preferred Output Format` in |app| to EPUB for normal Android devices or MOBI for Kindles (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
* Convert the books you want to read on your device to EPUB/MOBI format by selecting them and clicking the Convert button.
* Turn on the Content Server in |app|'s preferences and leave |app| running.
Now on your Android device, open the browser and browse to

View File

@ -2,7 +2,9 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe
class FocusRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'intromatyk <intromatyk@gmail.com>'
language = 'pl'
@ -46,15 +48,14 @@ class FocusRecipe(BasicNewsRecipe):
.fot{font-size: x-small; color: #666666;}
'''
feeds = [
('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
('Nauka', 'http://www.focus.pl/nauka/rss/'),
('Historia', 'http://www.focus.pl/historia/rss/'),
('Cywilizacja', 'http://www.focus.pl/cywilizacja/rss/'),
('Sport', 'http://www.focus.pl/sport/rss/'),
('Technika', 'http://www.focus.pl/technika/rss/'),
('Przyroda', 'http://www.focus.pl/przyroda/rss/'),
('Technologie', 'http://www.focus.pl/gadzety/rss/')
]
def skip_ad_pages(self, soup):

View File

@ -1,9 +1,10 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Gazeta_Wyborcza(BasicNewsRecipe):
title = u'Gazeta Wyborcza'
__author__ = 'fenuks'
__author__ = 'fenuks, Artur Stachecki'
language = 'pl'
description = 'news from gazeta.pl'
category = 'newspaper'
@ -15,27 +16,14 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = dict(id=['gazeta_article', 'article'])
remove_tags_after = dict(id='gazeta_article_share')
remove_tags = [dict(attrs={'class':['artReadMore', 'gazeta_article_related_new', 'txt_upl']}), dict(id=['gazeta_article_likes', 'gazeta_article_tools', 'rel', 'gazeta_article_tags', 'gazeta_article_share', 'gazeta_article_brand', 'gazeta_article_miniatures'])]
remove_tags_before = dict(id='k0')
remove_tags_after = dict(id='banP4')
remove_tags = [dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})]
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
(u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
(u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
#(u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
(u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
(u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
(u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
(u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
(u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
(u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
(u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
(u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss')
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
]
def skip_ad_pages(self, soup):
@ -44,7 +32,6 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
new_soup = self.index_to_soup(tag['href'], raw=True)
return new_soup
def append_page(self, soup, appendtag):
loop = False
tag = soup.find('div', attrs={'id': 'Str'})
@ -85,14 +72,30 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
rem.extract()
def preprocess_html(self, soup):
if soup.find(attrs={'class': 'piano_btn_1'}):
return None
else:
self.append_page(soup, soup.body)
if soup.find(id='container_gal'):
self.gallery_article(soup.body)
return soup
def print_version(self, url):
if 'http://wyborcza.biz/biznes/' not in url:
return url
if url.count('rss.feedsportal.com'):
u = url.find('wyborcza0Bpl')
u = 'http://www.wyborcza.pl/' + url[u + 11:]
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace('0E', '-')
u = u.replace('0H', ',')
u = u.replace('0I', '_')
u = u.replace('0B', '.')
u = u.replace('/1,', '/2029020,')
u = u.replace('/story01.htm', '')
print(u)
return u
elif 'http://wyborcza.pl/1' in url:
return url.replace('http://wyborcza.pl/1', 'http://wyborcza.pl/2029020')
else:
return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')

View File

@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class FocusRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'intromatyk <intromatyk@gmail.com>'
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1

View File

@ -34,16 +34,20 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'story'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleLeftBox'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'socialNewTools'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'socialTools'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxTop'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'recommendations'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'editorPicks'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks editorPicksFirst'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightText'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightButton'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxBottom'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'addRecommendation'}))
remove_tags.append(dict(name = 'h3', attrs = {'id' : 'tags'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -67,3 +71,4 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
return start + '/' + index + '?print=tak'

View File

@ -1,34 +1,55 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.magick import Image
class tvn24(BasicNewsRecipe):
title = u'TVN24'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
__author__ = 'fenuks, Artur Stachecki'
description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
category = 'news'
language = 'pl'
#masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
cover_url= 'http://www.userlogos.org/files/logos/Struna/TVN24.jpg'
extra_css = 'ul {list-style:none;} \
li {list-style:none; float: left; margin: 0 0.15em;} \
h2 {font-size: medium} \
.date60m {float: left; margin: 0 10px 0 5px;}'
masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags=[dict(name='h1', attrs={'class':['size30 mt10 pb10', 'size38 mt10 pb15']}), dict(name='figure', attrs={'class':'articleMainPhoto articleMainPhotoWide'}), dict(name='article', attrs={'class':['mb20', 'mb20 textArticleDefault']}), dict(name='ul', attrs={'class':'newsItem'})]
remove_tags = [dict(name='aside', attrs={'class':['innerArticleModule onRight cols externalContent', 'innerArticleModule center']}), dict(name='div', attrs={'class':['thumbsGallery', 'articleTools', 'article right rd7', 'heading', 'quizContent']}), dict(name='a', attrs={'class':'watchMaterial text'}), dict(name='section', attrs={'class':['quiz toCenter', 'quiz toRight']})]
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'),
(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
keep_only_tags=[
# dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
dict(name='div', attrs={'class':'mainContainer'}),
# dict(name='p'),
# dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
]
remove_tags=[
dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey loaded'})
]
remove_tags_after=[dict(name='li', attrs={'class':'share'})]
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
tag = soup.find(name='ul', attrs={'class':'newsItem'})
if tag:
tag.name='div'
tag.li.name='div'
return soup
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
def postprocess_html(self, soup, first):
#process all the images
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
if img < 0:
raise RuntimeError('Out of memory')
img.type = "GrayscaleType"
img.save(iurl)
return soup

View File

@ -3,6 +3,8 @@
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
__copyright__ = 'Modified 2012, Artur Stachecki <artur.stachecki@gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
import re
@ -11,7 +13,7 @@ class Wprost(BasicNewsRecipe):
EDITION = 0
FIND_LAST_FULL_ISSUE = True
EXCLUDE_LOCKED = True
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png'
title = u'Wprost'
__author__ = 'matek09'
@ -20,6 +22,7 @@ class Wprost(BasicNewsRecipe):
no_stylesheets = True
language = 'pl'
remove_javascript = True
recursions = 0
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
@ -35,13 +38,15 @@ class Wprost(BasicNewsRecipe):
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: ''),
(re.compile(r'\<tr>'), lambda match: ''),
(re.compile(r'\<td .*?\>'), lambda match: '')]
(re.compile(r'\<td .*?\>'), lambda match: ''),
(re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
extra_css = '''
.div-header {font-size: x-small; font-weight: bold}
'''
@ -59,27 +64,26 @@ class Wprost(BasicNewsRecipe):
a = 0
if self.FIND_LAST_FULL_ISSUE:
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
else:
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
a = soup.find('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
self.EDITION_SHORT = a['href'].replace('/tygodnik/?I=15', '')
self.cover_url = a.img['src']
def parse_index(self):
self.find_last_issue()
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
feeds = []
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
for main_block in soup.findAll(attrs={'id': 'content-main-column-element-content'}):
articles = list(self.find_articles(main_block))
if len(articles) > 0:
section = self.tag_to_string(main_block)
section = self.tag_to_string(main_block.find('h3'))
feeds.append((section, articles))
return feeds
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
for a in main_block.findAll('a'):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
@ -91,3 +95,4 @@ class Wprost(BasicNewsRecipe):
'description' : ''
}

View File

@ -901,6 +901,9 @@ class Device(DeviceConfig, DevicePlugin):
for d in drives:
try:
winutil.eject_drive(bytes(d)[0])
except Exception as e:
try:
prints(as_unicode(e))
except:
pass

View File

@ -150,8 +150,15 @@ class EPUBInput(InputFormatPlugin):
from calibre import walk
from calibre.ebooks import DRMError
from calibre.ebooks.metadata.opf2 import OPF
try:
zf = ZipFile(stream)
zf.extractall(os.getcwdu())
except:
log.exception('EPUB appears to be invalid ZIP file, trying a'
' more forgiving ZIP parser')
from calibre.utils.localunzip import extractall
stream.seek(0)
extractall(stream)
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
opf = self.find_opf()
if opf is None:

View File

@ -10,6 +10,7 @@ from cStringIO import StringIO
from contextlib import closing
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
from calibre.utils.localunzip import LocalZipFile
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
@ -105,6 +106,9 @@ class OCFReader(OCF):
class OCFZipReader(OCFReader):
def __init__(self, stream, mode='r', root=None):
if isinstance(stream, (LocalZipFile, ZipFile)):
self.archive = stream
else:
try:
self.archive = ZipFile(stream, mode=mode)
except BadZipfile:
@ -119,8 +123,18 @@ class OCFZipReader(OCFReader):
super(OCFZipReader, self).__init__()
def open(self, name, mode='r'):
if isinstance(self.archive, LocalZipFile):
return self.archive.open(name)
return StringIO(self.archive.read(name))
def get_zip_reader(stream, root=None):
try:
zf = ZipFile(stream, mode='r')
except:
stream.seek(0)
zf = LocalZipFile(stream)
return OCFZipReader(zf, root=root)
class OCFDirReader(OCFReader):
def __init__(self, path):
self.root = path
@ -184,7 +198,12 @@ def render_cover(opf, opf_path, zf, reader=None):
def get_cover(opf, opf_path, stream, reader=None):
raster_cover = opf.raster_cover
stream.seek(0)
try:
zf = ZipFile(stream)
except:
stream.seek(0)
zf = LocalZipFile(stream)
if raster_cover:
base = posixpath.dirname(opf_path)
cpath = posixpath.normpath(posixpath.join(base, raster_cover))
@ -207,7 +226,7 @@ def get_cover(opf, opf_path, stream, reader=None):
def get_metadata(stream, extract_cover=True):
""" Return metadata as a :class:`Metadata` object """
stream.seek(0)
reader = OCFZipReader(stream)
reader = get_zip_reader(stream)
mi = reader.opf.to_book_metadata()
if extract_cover:
try:
@ -232,7 +251,7 @@ def _write_new_cover(new_cdata, cpath):
def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
stream.seek(0)
reader = OCFZipReader(stream, root=os.getcwdu())
reader = get_zip_reader(stream, root=os.getcwdu())
raster_cover = reader.opf.raster_cover
mi = MetaInformation(mi)
new_cdata = None
@ -283,6 +302,10 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
reader.opf.timestamp = mi.timestamp
newopf = StringIO(reader.opf.render())
if isinstance(reader.archive, LocalZipFile):
reader.archive.safe_replace(reader.container[OPF.MIMETYPE], newopf,
extra_replacements=replacements)
else:
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf,
extra_replacements=replacements)
try:

View File

@ -239,10 +239,11 @@ class PluginWidget(QWidget,Ui_Form):
def initialize(self, name, db):
'''
CheckBoxControls (c_type: check_box):
['generate_titles','generate_series','generate_genres',
'generate_recently_added','generate_descriptions','include_hr']
['cross_reference_authors',
'generate_titles','generate_series','generate_genres',
'generate_recently_added','generate_descriptions',
'include_hr']
ComboBoxControls (c_type: combo_box):
['exclude_source_field','header_note_source_field',
'merge_source_field']

View File

@ -305,7 +305,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
<string>Other options</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="2" column="1">
<item row="3" column="1">
<layout class="QHBoxLayout" name="merge_with_comments_hl">
<item>
<widget class="QComboBox" name="merge_source_field">
@ -372,7 +372,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
</item>
</layout>
</item>
<item row="2" column="0">
<item row="3" column="0">
<widget class="QLabel" name="label_9">
<property name="minimumSize">
<size>
@ -397,7 +397,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
</property>
</widget>
</item>
<item row="0" column="0">
<item row="1" column="0">
<widget class="QLabel" name="label_4">
<property name="minimumSize">
<size>
@ -413,7 +413,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
</property>
</widget>
</item>
<item row="0" column="1">
<item row="1" column="1">
<layout class="QHBoxLayout" name="replace_cover_hl">
<item>
<widget class="QRadioButton" name="generate_new_cover">
@ -447,7 +447,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
</item>
</layout>
</item>
<item row="1" column="0">
<item row="2" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>E&amp;xtra Description note:</string>
@ -460,7 +460,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
</property>
</widget>
</item>
<item row="1" column="1">
<item row="2" column="1">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QComboBox" name="header_note_source_field">
@ -561,6 +561,27 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
</item>
</layout>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Author cross-references:</string>
</property>
<property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
</property>
</widget>
</item>
<item row="0" column="1">
<layout class="QHBoxLayout" name="cross_references_hl">
<item>
<widget class="QCheckBox" name="cross_reference_authors">
<property name="text">
<string>For books with multiple authors, list each author separately</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</item>

View File

@ -6,102 +6,19 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonDEKindleStore(StorePlugin):
class AmazonDEKindleStore(AmazonUKKindleStore):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'charhale0a-21'}
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
if author.startswith('von '):
author = author[4:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
drm_search_text = u'Gleichzeitige Verwendung von Geräten'
drm_free_text = u'Keine Einschränkung'
url = 'http://amazon.de/dp/'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
drm_search_text + '")])'):
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
drm_free_text + '") and contains(b, "' +
drm_search_text + '")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_UNKNOWN
else:
search_result.drm = SearchResult.DRM_LOCKED
return True

View File

@ -6,78 +6,17 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonESKindleStore(StorePlugin):
class AmazonESKindleStore(AmazonUKKindleStore):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'charhale09-21'}
store_link = 'http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790' % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.es/gp/redirect.html?ie=UTF8&location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3626&creative=24790' % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
'&linkCode=ur2&camp=3626&creative=24790')
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '):
author = author[3:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
s.drm = SearchResult.DRM_UNKNOWN
yield s

View File

@ -6,79 +6,16 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from lxml import html
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonFRKindleStore(StorePlugin):
class AmazonFRKindleStore(AmazonUKKindleStore):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'charhale-21'}
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('de '):
author = author[3:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
s.drm = SearchResult.DRM_UNKNOWN
yield s

View File

@ -6,78 +6,17 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonITKindleStore(StorePlugin):
class AmazonITKindleStore(AmazonUKKindleStore):
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'httpcharles07-21'}
store_link = 'http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322' % aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.it/gp/redirect.html?ie=UTF8&location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3370&creative=23322' % aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=3370&creative=23322')
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
cover_xpath = './/img[@class="productImage"]/@src'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
if author.startswith('di '):
author = author[3:]
counter -= 1
s = SearchResult()
s.cover_url = cover_url.strip()
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.formats = 'Kindle'
s.drm = SearchResult.DRM_UNKNOWN
yield s

View File

@ -6,8 +6,9 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
import re
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
@ -18,57 +19,80 @@ from calibre.gui2.store import StorePlugin
from calibre.gui2.store.search_result import SearchResult
class AmazonUKKindleStore(StorePlugin):
aff_id = {'tag': 'calcharles-21'}
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=19450')
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
'linkCode=ur2&camp=1634&creative=6738')
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
'''
For comments on the implementation, please see amazon_plugin.py
'''
def open(self, parent=None, detail_item=None, external=False):
aff_id = {'tag': 'calcharles-21'}
store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id
store_link = self.store_link % self.aff_id
if detail_item:
aff_id['asin'] = detail_item
store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
self.aff_id['asin'] = detail_item
store_link = self.store_link_details % self.aff_id
open_url(QUrl(store_link))
def search(self, query, max_results=10, timeout=60):
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
# Apparently amazon Europe is responding in UTF-8 now
doc = html.fromstring(f.read())
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
format_xpath = './/span[@class="format"]/text()'
data_xpath = '//div[contains(@class, "prod")]'
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
asin_xpath = './/div[@class="image"]/a[1]'
cover_xpath = './/img[@class="productImage"]/@src'
title_xpath = './/h3[@class="newaps"]/a//text()'
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
for data in doc.xpath(data_xpath):
if counter <= 0:
break
# Even though we are searching digital-text only Amazon will still
# put in results for non Kindle books (author pages). So we need
# put in results for non Kindle books (author pages). Se we need
# to explicitly check if the item is a Kindle book and ignore it
# if it isn't.
format = ''.join(data.xpath(format_xpath))
if 'kindle' not in format.lower():
format_ = ''.join(data.xpath(format_xpath))
if 'kindle' not in format_.lower():
continue
# We must have an asin otherwise we can't easily reference the
# book later.
asin = ''.join(data.xpath("@name"))
asin_href = None
asin_a = data.xpath(asin_xpath)
if asin_a:
asin_href = asin_a[0].get('href', '')
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
if m:
asin = m.group('asin')
else:
continue
else:
continue
cover_url = ''.join(data.xpath(cover_xpath))
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
title = ''.join(data.xpath(title_xpath))
author = ''.join(data.xpath(author_xpath))
try:
author = author.split('by ', 1)[1].split(" (")[0]
except:
pass
author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
if author.startswith('by '):
author = author[3:]
price = ''.join(data.xpath(price_xpath))
counter -= 1
@ -78,37 +102,10 @@ class AmazonUKKindleStore(StorePlugin):
s.author = author.strip()
s.price = price.strip()
s.detail_item = asin.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Kindle'
yield s
def get_details(self, search_result, timeout):
# We might already have been called.
if search_result.drm:
return
url = 'http://amazon.co.uk/dp/'
drm_search_text = u'Simultaneous Device Usage'
drm_free_text = u'Unlimited'
br = browser()
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
if not search_result.author:
search_result.author = ''.join(idata.xpath('//div[@class="buying" and contains(., "Author")]/a/text()'))
is_kindle = idata.xpath('boolean(//div[@class="buying"]/h1/span/span[contains(text(), "Kindle Edition")])')
if is_kindle:
search_result.formats = 'Kindle'
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
drm_search_text + '")])'):
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
drm_free_text + '") and contains(b, "' +
drm_search_text + '")])'):
search_result.drm = SearchResult.DRM_UNLOCKED
else:
search_result.drm = SearchResult.DRM_UNKNOWN
else:
search_result.drm = SearchResult.DRM_LOCKED
return True
pass

View File

@ -25,7 +25,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://ad.zanox.com/ppc/?18817073C15644254T'
url_details = ('http://ad.zanox.com/ppc/?18817073C15644254T&ULP=[['
'http://www.libri.de/shop/action/productDetails?artiId={0}]]')
'http://www.ebook.de/shop/action/productDetails?artiId={0}]]')
if external or self.config.get('open_external', False):
if detail_item:
@ -41,33 +41,38 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = ('http://www.libri.de/shop/action/quickSearch?facetNodeId=6'
'&mainsearchSubmit=Los!&searchString=' + urllib2.quote(query))
url = ('http://www.ebook.de/de/pathSearch?nav=52122&searchString='
+ urllib2.quote(query))
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//div[contains(@class, "item")]'):
for data in doc.xpath('//div[contains(@class, "articlecontainer")]'):
if counter <= 0:
break
details = data.xpath('./div[@class="beschreibungContainer"]')
details = data.xpath('./div[@class="articleinfobox"]')
if not details:
continue
details = details[0]
id = ''.join(details.xpath('./div[@class="text"]/a/@name')).strip()
if not id:
id_ = ''.join(details.xpath('./a/@name')).strip()
if not id_:
continue
cover_url = ''.join(details.xpath('.//div[@class="coverImg"]/a/img/@src'))
title = ''.join(details.xpath('./div[@class="text"]/span[@class="titel"]/a/text()')).strip()
author = ''.join(details.xpath('./div[@class="text"]/span[@class="author"]/text()')).strip()
title = ''.join(details.xpath('.//a[@class="su1_c_l_titel"]/text()')).strip()
author = ''.join(details.xpath('.//div[@class="author"]/text()')).strip()
if author.startswith('von'):
author = author[4:]
pdf = details.xpath(
'boolean(.//span[@class="format" and contains(text(), "pdf")]/text())')
'boolean(.//span[@class="bindername" and contains(text(), "pdf")]/text())')
epub = details.xpath(
'boolean(.//span[@class="format" and contains(text(), "epub")]/text())')
'boolean(.//span[@class="bindername" and contains(text(), "epub")]/text())')
mobi = details.xpath(
'boolean(.//span[@class="format" and contains(text(), "mobipocket")]/text())')
'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())')
cover_url = ''.join(data.xpath('.//div[@class="coverImg"]/a/img/@src'))
price = ''.join(data.xpath('.//span[@class="preis"]/text()')).replace('*', '').strip()
counter -= 1
@ -78,7 +83,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
s.author = author.strip()
s.price = price
s.drm = SearchResult.DRM_UNKNOWN
s.detail_item = id
s.detail_item = id_
formats = []
if epub:
formats.append('ePub')

View File

@ -41,6 +41,13 @@ class EPUB_MOBI(CatalogPlugin):
help = _('Title of generated catalog used as title in metadata.\n'
"Default: '%default'\n"
"Applies to: AZW3, ePub, MOBI output formats")),
Option('--cross-reference-authors',
default=False,
dest='cross_reference_authors',
action = 'store_true',
help=_("Create cross-references in Authors section for books with multiple authors.\n"
"Default: '%default'\n"
"Applies to: AZW3, ePub, MOBI output formats")),
Option('--debug-pipeline',
default=None,
dest='debug_pipeline',
@ -58,7 +65,6 @@ class EPUB_MOBI(CatalogPlugin):
help=_("Regex describing tags to exclude as genres.\n"
"Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n"
"Applies to: AZW3, ePub, MOBI output formats")),
Option('--exclusion-rules',
default="(('Catalogs','Tags','Catalog'),)",
dest='exclusion_rules',
@ -72,7 +78,6 @@ class EPUB_MOBI(CatalogPlugin):
"When multiple rules are defined, all rules will be applied.\n"
"Default: \n" + '"' + '%default' + '"' + "\n"
"Applies to AZW3, ePub, MOBI output formats")),
Option('--generate-authors',
default=False,
dest='generate_authors',
@ -318,8 +323,8 @@ class EPUB_MOBI(CatalogPlugin):
build_log.append(" opts:")
for key in keys:
if key in ['catalog_title','author_clip','connected_kindle','creator',
'description_clip','exclude_book_marker','exclude_genre',
'exclude_tags','exclusion_rules', 'fmt',
'cross_reference_authors','description_clip','exclude_book_marker',
'exclude_genre','exclude_tags','exclusion_rules', 'fmt',
'header_note_source_field','merge_comments_rule',
'output_profile','prefix_rules','read_book_marker',
'search_text','sort_by','sort_descriptions_by_author','sync',

View File

@ -14,11 +14,12 @@ from calibre.customize.conversion import DummyReporter
from calibre.customize.ui import output_profiles
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.metadata import author_to_author_sort
from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.config import config_dir
from calibre.utils.date import format_date, is_date_undefined, now as nowf
from calibre.utils.filenames import ascii_text
from calibre.utils.filenames import ascii_text, shorten_components_to
from calibre.utils.icu import capitalize, collation_order, sort_key
from calibre.utils.magick.draw import thumbnail
from calibre.utils.zipfile import ZipFile
@ -109,6 +110,7 @@ class CatalogBuilder(object):
self.stylesheet = stylesheet
self.cache_dir = os.path.join(config_dir, 'caches', 'catalog')
self.catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
self.content_dir = os.path.join(self.catalog_path, "content")
self.excluded_tags = self.get_excluded_tags()
self.generate_for_kindle_azw3 = True if (_opts.fmt == 'azw3' and
_opts.output_profile and
@ -127,12 +129,13 @@ class CatalogBuilder(object):
self.books_by_title = None
self.books_by_title_no_series_prefix = None
self.books_to_catalog = None
self.content_dir = os.path.join(self.catalog_path, "content")
self.current_step = 0.0
self.error = []
self.generate_recently_read = False
self.genres = []
self.genre_tags_dict = None
self.genre_tags_dict = \
self.filter_db_tags(max_len = 245 - len("%s/Genre_.html" % self.content_dir)) \
if self.opts.generate_genres else None
self.html_filelist_1 = []
self.html_filelist_2 = []
self.merge_comments_rule = dict(zip(['field','position','hr'],
@ -505,7 +508,7 @@ class CatalogBuilder(object):
if not os.path.isdir(images_path):
os.makedirs(images_path)
def detect_author_sort_mismatches(self):
def detect_author_sort_mismatches(self, books_to_test):
""" Detect author_sort mismatches.
Sort by author, look for inconsistencies in author_sort among
@ -513,17 +516,18 @@ class CatalogBuilder(object):
annoyance for EPUB.
Inputs:
self.books_to_catalog (list): list of books to catalog
books_by_author (list): list of books to test, possibly unsorted
Output:
self.books_by_author (list): sorted by author
(none)
Exceptions:
AuthorSortMismatchException: author_sort mismatch detected
"""
self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
books_by_author = sorted(list(books_to_test), key=self._kf_books_by_author_sorter_author)
authors = [(record['author'], record['author_sort']) for record in books_by_author]
current_author = authors[0]
for (i,author) in enumerate(authors):
if author != current_author and i:
@ -701,6 +705,7 @@ class CatalogBuilder(object):
def fetch_books_by_author(self):
""" Generate a list of books sorted by author.
For books with multiple authors, relist book with additional authors.
Sort the database by author. Report author_sort inconsistencies as warning when
building EPUB or MOBI, error when building MOBI. Collect a list of unique authors
to self.authors.
@ -720,25 +725,30 @@ class CatalogBuilder(object):
self.update_progress_full_step(_("Sorting database"))
self.detect_author_sort_mismatches()
books_by_author = list(self.books_to_catalog)
self.detect_author_sort_mismatches(books_by_author)
if self.opts.cross_reference_authors:
books_by_author = self.relist_multiple_authors(books_by_author)
#books_by_author = sorted(list(books_by_author), key=self._kf_books_by_author_sorter_author)
# Sort authors using sort_key to normalize accented letters
# Determine the longest author_sort length before sorting
asl = [i['author_sort'] for i in self.books_by_author]
asl = [i['author_sort'] for i in books_by_author]
las = max(asl, key=len)
self.books_by_author = sorted(self.books_to_catalog,
books_by_author = sorted(books_by_author,
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
if self.DEBUG and self.opts.verbose:
tl = [i['title'] for i in self.books_by_author]
tl = [i['title'] for i in books_by_author]
lt = max(tl, key=len)
fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las))
print(fs.format('','Title','Author','Series'))
for i in self.books_by_author:
for i in books_by_author:
print(fs.format('', i['title'],i['author_sort'],i['series']))
# Build the unique_authors set from existing data
authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author]
authors = [(record['author'], capitalize(record['author_sort'])) for record in books_by_author]
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
# authors[]: (([0]:friendly [1]:sort))
@ -776,6 +786,7 @@ class CatalogBuilder(object):
author[2])).encode('utf-8'))
self.authors = unique_authors
self.books_by_author = books_by_author
return True
def fetch_books_by_title(self):
@ -863,15 +874,15 @@ class CatalogBuilder(object):
this_title['series_index'] = 0.0
this_title['title_sort'] = self.generate_sort_title(this_title['title'])
if 'authors' in record:
# from calibre.ebooks.metadata import authors_to_string
# return authors_to_string(self.authors)
if 'authors' in record:
this_title['authors'] = record['authors']
# Synthesize author attribution from authors list
if record['authors']:
this_title['author'] = " &amp; ".join(record['authors'])
else:
this_title['author'] = 'Unknown'
this_title['author'] = _('Unknown')
this_title['authors'] = [this_title['author']]
if 'author_sort' in record and record['author_sort'].strip():
this_title['author_sort'] = record['author_sort']
@ -1093,7 +1104,7 @@ class CatalogBuilder(object):
self.bookmarked_books = bookmarks
def filter_db_tags(self):
def filter_db_tags(self, max_len):
""" Remove excluded tags from data set, return normalized genre list.
Filter all db tags, removing excluded tags supplied in opts.
@ -1101,13 +1112,13 @@ class CatalogBuilder(object):
tags are flattened to alphanumeric ascii_text.
Args:
(none)
max_len: maximum length of normalized tag to fit within OS constraints
Return:
genre_tags_dict (dict): dict of filtered, normalized tags in data set
"""
def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
def _format_tag_list(tags, indent=1, line_break=70, header='Tag list'):
def _next_tag(sorted_tags):
for (i, tag) in enumerate(sorted_tags):
if i < len(tags) - 1:
@ -1126,6 +1137,31 @@ class CatalogBuilder(object):
out_str = ' ' * (indent + 1)
return ans + out_str
def _normalize_tag(tag, max_len):
""" Generate an XHTML-legal anchor string from tag.
Parse tag for non-ascii, convert to unicode name.
Args:
tags (str): tag name possible containing symbols
max_len (int): maximum length of tag
Return:
normalized (str): unicode names substituted for non-ascii chars,
clipped to max_len
"""
normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
if re.search('\W',normalized):
normalized = ''
for c in massaged:
if re.search('\W',c):
normalized += self.generate_unicode_name(c)
else:
normalized += c
shortened = shorten_components_to(max_len, [normalized])[0]
return shortened
# Entry point
normalized_tags = []
friendly_tags = []
@ -1144,7 +1180,7 @@ class CatalogBuilder(object):
if tag == ' ':
continue
normalized_tags.append(self.normalize_tag(tag))
normalized_tags.append(_normalize_tag(tag, max_len))
friendly_tags.append(tag)
genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
@ -1941,8 +1977,6 @@ class CatalogBuilder(object):
self.update_progress_full_step(_("Genres HTML"))
self.genre_tags_dict = self.filter_db_tags()
# Extract books matching filtered_tags
genre_list = []
for friendly_tag in sorted(self.genre_tags_dict, key=sort_key):
@ -2024,10 +2058,11 @@ class CatalogBuilder(object):
books_by_current_author += 1
# Write the genre book list as an article
titles_spanned = self.generate_html_by_genre(genre, True if index==0 else False,
outfile = "%s/Genre_%s.html" % (self.content_dir, genre)
titles_spanned = self.generate_html_by_genre(genre,
True if index==0 else False,
genre_tag_set[genre],
"%s/Genre_%s.html" % (self.content_dir,
genre))
outfile)
tag_file = "content/Genre_%s.html" % genre
master_genre_list.append({'tag':genre,
@ -2549,7 +2584,7 @@ class CatalogBuilder(object):
for (i, tag) in enumerate(sorted(book.get('tags', []))):
aTag = Tag(_soup,'a')
if self.opts.generate_genres:
aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag)
aTag['href'] = "Genre_%s.html" % self.genre_tags_dict[tag]
aTag.insert(0,escape(NavigableString(tag)))
genresTag.insert(gtc, aTag)
gtc += 1
@ -4603,28 +4638,6 @@ class CatalogBuilder(object):
return merged
def normalize_tag(self, tag):
""" Generate an XHTML-legal anchor string from tag.
Parse tag for non-ascii, convert to unicode name.
Args:
tags (str): tag name possible containing symbols
Return:
normalized (str): unicode names substituted for non-ascii chars
"""
normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
if re.search('\W',normalized):
normalized = ''
for c in massaged:
if re.search('\W',c):
normalized += self.generate_unicode_name(c)
else:
normalized += c
return normalized
def process_exclusions(self, data_set):
""" Filter data_set based on exclusion_rules.
@ -4697,6 +4710,43 @@ class CatalogBuilder(object):
else:
return data_set
def relist_multiple_authors(self, books_by_author):
""" Create multiple entries for books with multiple authors
Given a list of books by author, scan list for books with multiple
authors. Add a cloned copy of the book per additional author.
Args:
books_by_author (list): book list possibly containing books
with multiple authors
Return:
(list): books_by_author with additional cloned entries for books with
multiple authors
"""
multiple_author_books = []
# Find the multiple author books
for book in books_by_author:
if len(book['authors']) > 1:
multiple_author_books.append(book)
for book in multiple_author_books:
cloned_authors = list(book['authors'])
for x, author in enumerate(book['authors']):
if x:
first_author = cloned_authors.pop(0)
cloned_authors.append(first_author)
new_book = deepcopy(book)
new_book['author'] = ' & '.join(cloned_authors)
new_book['authors'] = list(cloned_authors)
asl = [author_to_author_sort(auth) for auth in cloned_authors]
new_book['author_sort'] = ' & '.join(asl)
books_by_author.append(new_book)
return books_by_author
def update_progress_full_step(self, description):
""" Update calibre's job status UI.

View File

@ -0,0 +1,153 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from, unpack
from calibre.utils.fonts.sfnt import UnknownTable
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
# Useful links
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
class CFF(object):
def __init__(self, raw):
(self.major_version, self.minor_version, self.header_size,
self.offset_size) = unpack_from(b'>4B', raw)
if (self.major_version, self.minor_version) != (1, 0):
raise UnsupportedFont('The CFF table has unknown version: '
'(%d, %d)'%(self.major_version, self.minor_version))
offset = self.header_size
# Read Names Index
self.font_names = Index(raw, offset)
offset = self.font_names.pos
if len(self.font_names) > 1:
raise UnsupportedFont('CFF table has more than one font.')
# Read Top Dict
self.top_index = Index(raw, offset)
offset = self.top_index.pos
# Read strings
self.strings = Strings(raw, offset)
offset = self.strings.pos
print (self.strings[len(cff_standard_strings):])
class Index(list):
def __init__(self, raw, offset):
list.__init__(self)
count = unpack_from(b'>H', raw, offset)[0]
offset += 2
self.pos = offset
if count > 0:
self.offset_size = unpack_from(b'>B', raw, offset)[0]
offset += 1
if self.offset_size == 3:
offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
for i in xrange(offset, 3*(count+2), 3)]
else:
fmt = {1:'B', 2:'H', 4:'L'}.get(self.offset_size)
fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
offsets = unpack_from(fmt, raw, offset)
offset += self.offset_size * (count+1) - 1
for i in xrange(len(offsets)-1):
off, noff = offsets[i:i+2]
obj = raw[offset+i:offset+noff]
self.append(obj)
self.pos = offset + offsets[-1]
class Strings(Index):
def __init__(self, raw, offset):
super(Strings, self).__init__(raw, offset)
for x in reversed(cff_standard_strings):
self.insert(0, x)
class CFFTable(UnknownTable):
def decompile(self):
self.cff = CFF(self.raw)
# cff_standard_strings {{{
# The 391 Standard Strings as used in the CFF format.
# from Adobe Technical None #5176, version 1.0, 18 March 1998
cff_standard_strings = [
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
'Semibold'
]
# }}}

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,201 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack
t1_operand_encoding = [None] * 256
t1_operand_encoding[0:32] = (32) * ["do_operator"]
t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
t1_operand_encoding[255] = "read_long_int"
t2_operand_encoding = t1_operand_encoding[:]
t2_operand_encoding[28] = "read_short_int"
t2_operand_encoding[255] = "read_fixed_1616"
cff_dict_operand_encoding = t2_operand_encoding[:]
cff_dict_operand_encoding[29] = "read_long_int"
cff_dict_operand_encoding[30] = "read_real_number"
cff_dict_operand_encoding[255] = "reserved"
real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'.', 'E', 'E-', None, '-']
class SimpleConverter(object):
def read(self, parent, value):
return value
def write(self, parent, value):
return value
class TODO(SimpleConverter):
pass
class Reader(dict):
def read_byte(self, b0, data, index):
return b0 - 139, index
def read_small_int1(self, b0, data, index):
b1 = ord(data[index])
return (b0-247)*256 + b1 + 108, index+1
def read_small_int2(self, b0, data, index):
b1 = ord(data[index])
return -(b0-251)*256 - b1 - 108, index+1
def read_short_int(self, b0, data, index):
bin = data[index] + data[index+1]
value, = unpack(b">h", bin)
return value, index+2
def read_long_int(self, b0, data, index):
bin = data[index] + data[index+1] + data[index+2] + data[index+3]
value, = unpack(b">l", bin)
return value, index+4
def read_fixed_1616(self, b0, data, index):
bin = data[index] + data[index+1] + data[index+2] + data[index+3]
value, = unpack(b">l", bin)
return value / 65536.0, index+4
def read_real_number(self, b0, data, index):
number = ''
while True:
b = ord(data[index])
index = index + 1
nibble0 = (b & 0xf0) >> 4
nibble1 = b & 0x0f
if nibble0 == 0xf:
break
number = number + real_nibbles[nibble0]
if nibble1 == 0xf:
break
number = number + real_nibbles[nibble1]
return float(number), index
class Dict(Reader):
operand_encoding = cff_dict_operand_encoding
TABLE = []
def __init__(self):
Reader.__init__(self)
table = self.TABLE[:]
for i in xrange(len(table)):
op, name, arg, default, conv = table[i]
if conv is not None:
continue
if arg in ("delta", "array", 'number', 'SID'):
conv = SimpleConverter()
else:
raise Exception('Should not happen')
table[i] = op, name, arg, default, conv
self.operators = {op:(name, arg) for op, name, arg, default, conv in
table}
def decompile(self, strings, global_subrs, data):
self.strings = strings
self.global_subrs = global_subrs
self.stack = []
index = 0
while index < len(data):
b0 = ord(data[index])
index += 1
handler = getattr(self, self.operand_encoding[b0])
value, index = handler(b0, data, index)
if value is not None:
self.stack.append(value)
def do_operator(self, b0, data, index):
if b0 == 12:
op = (b0, ord(data[index]))
index += 1
else:
op = b0
operator, arg_type = self.operators[op]
self.handle_operator(operator, arg_type)
return None, index
def handle_operator(self, operator, arg_type):
if isinstance(arg_type, tuple):
value = ()
for i in xrange(len(arg_type)-1, -1, -1):
arg = arg_type[i]
arghandler = getattr(self, 'arg_' + arg)
value = (arghandler(operator),) + value
else:
arghandler = getattr(self, 'arg_' + arg_type)
value = arghandler(operator)
self[operator] = value
def arg_number(self, name):
return self.stack.pop()
def arg_SID(self, name):
return self.strings[self.stack.pop()]
def arg_array(self, name):
ans = self.stack[:]
del self.stack[:]
return ans
def arg_delta(self, name):
out = []
current = 0
for v in self.stack:
current = current + v
out.append(current)
del self.stack[:]
return out
class TopDict(Dict):
TABLE = [
#opcode name argument type default converter
((12, 30), 'ROS', ('SID','SID','number'), None, SimpleConverter()),
((12, 20), 'SyntheticBase', 'number', None, None),
(0, 'version', 'SID', None, None),
(1, 'Notice', 'SID', None, None),
((12, 0), 'Copyright', 'SID', None, None),
(2, 'FullName', 'SID', None, None),
((12, 38), 'FontName', 'SID', None, None),
(3, 'FamilyName', 'SID', None, None),
(4, 'Weight', 'SID', None, None),
((12, 1), 'isFixedPitch', 'number', 0, None),
((12, 2), 'ItalicAngle', 'number', 0, None),
((12, 3), 'UnderlinePosition', 'number', None, None),
((12, 4), 'UnderlineThickness', 'number', 50, None),
((12, 5), 'PaintType', 'number', 0, None),
((12, 6), 'CharstringType', 'number', 2, None),
((12, 7), 'FontMatrix', 'array', [0.001,0,0,0.001,0,0], None),
(13, 'UniqueID', 'number', None, None),
(5, 'FontBBox', 'array', [0,0,0,0], None),
((12, 8), 'StrokeWidth', 'number', 0, None),
(14, 'XUID', 'array', None, None),
((12, 21), 'PostScript', 'SID', None, None),
((12, 22), 'BaseFontName', 'SID', None, None),
((12, 23), 'BaseFontBlend', 'delta', None, None),
((12, 31), 'CIDFontVersion', 'number', 0, None),
((12, 32), 'CIDFontRevision', 'number', 0, None),
((12, 33), 'CIDFontType', 'number', 0, None),
((12, 34), 'CIDCount', 'number', 8720, None),
(15, 'charset', 'number', 0, TODO()),
((12, 35), 'UIDBase', 'number', None, None),
(16, 'Encoding', 'number', 0, TODO()),
(18, 'Private', ('number','number'), None, TODO()),
((12, 37), 'FDSelect', 'number', None, TODO()),
((12, 36), 'FDArray', 'number', None, TODO()),
(17, 'CharStrings', 'number', None, TODO()),
]

View File

@ -0,0 +1,166 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from, unpack
from calibre.utils.fonts.sfnt import UnknownTable
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre.utils.fonts.sfnt.cff.dict_data import TopDict
# Useful links
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
class CFF(object):
def __init__(self, raw):
(self.major_version, self.minor_version, self.header_size,
self.offset_size) = unpack_from(b'>4B', raw)
if (self.major_version, self.minor_version) != (1, 0):
raise UnsupportedFont('The CFF table has unknown version: '
'(%d, %d)'%(self.major_version, self.minor_version))
offset = self.header_size
# Read Names Index
self.font_names = Index(raw, offset)
offset = self.font_names.pos
if len(self.font_names) > 1:
raise UnsupportedFont('CFF table has more than one font.')
# Read Top Dict
self.top_index = Index(raw, offset)
self.top_dict = TopDict()
offset = self.top_index.pos
# Read strings
self.strings = Strings(raw, offset)
offset = self.strings.pos
# Read global subroutines
self.global_subrs = GlobalSubrs(raw, offset)
offset = self.global_subrs.pos
# Decompile Top Dict
self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
import pprint
pprint.pprint(self.top_dict)
class Index(list):
def __init__(self, raw, offset, prepend=()):
list.__init__(self)
self.extend(prepend)
count = unpack_from(b'>H', raw, offset)[0]
offset += 2
self.pos = offset
if count > 0:
self.offset_size = unpack_from(b'>B', raw, offset)[0]
offset += 1
if self.offset_size == 3:
offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
for i in xrange(offset, 3*(count+2), 3)]
else:
fmt = {1:'B', 2:'H', 4:'L'}.get(self.offset_size)
fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
offsets = unpack_from(fmt, raw, offset)
offset += self.offset_size * (count+1) - 1
for i in xrange(len(offsets)-1):
off, noff = offsets[i:i+2]
obj = raw[offset+off:offset+noff]
self.append(obj)
self.pos = offset + offsets[-1]
class Strings(Index):
def __init__(self, raw, offset):
super(Strings, self).__init__(raw, offset, prepend=cff_standard_strings)
class GlobalSubrs(Index):
pass
class CFFTable(UnknownTable):
def decompile(self):
self.cff = CFF(self.raw)
# cff_standard_strings {{{
# The 391 Standard Strings as used in the CFF format.
# from Adobe Technical None #5176, version 1.0, 18 March 1998
cff_standard_strings = [
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
'Semibold'
]
# }}}

View File

@ -21,6 +21,7 @@ from calibre.utils.fonts.sfnt.maxp import MaxpTable
from calibre.utils.fonts.sfnt.loca import LocaTable
from calibre.utils.fonts.sfnt.glyf import GlyfTable
from calibre.utils.fonts.sfnt.cmap import CmapTable
from calibre.utils.fonts.sfnt.cff.table import CFFTable
# OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm
@ -42,6 +43,7 @@ class Sfnt(object):
b'loca' : LocaTable,
b'glyf' : GlyfTable,
b'cmap' : CmapTable,
b'CFF ' : CFFTable,
}.get(table_tag, UnknownTable)(table)
def __getitem__(self, key):
@ -53,12 +55,24 @@ class Sfnt(object):
def __delitem__(self, key):
del self.tables[key]
def __iter__(self):
'''Iterate over the table tags in optimal order as per
http://partners.adobe.com/public/developer/opentype/index_recs.html'''
keys = list(self.tables.keys())
order = {x:i for i, x in enumerate((b'head', b'hhea', b'maxp', b'OS/2',
b'hmtx', b'LTSH', b'VDMX', b'hdmx', b'cmap', b'fpgm', b'prep',
b'cvt ', b'loca', b'glyf', b'CFF ', b'kern', b'name', b'post',
b'gasp', b'PCLT', b'DSIG'))}
keys.sort(key=lambda x:order.get(x, 1000))
for x in keys:
yield x
def pop(self, key, default=None):
return self.tables.pop(key, default)
def sizes(self):
ans = OrderedDict()
for tag in sorted(self.tables):
for tag in self:
ans[tag] = len(self[tag])
return ans
@ -82,7 +96,7 @@ class Sfnt(object):
table_data = []
offset = stream.tell() + ( calcsize(b'>4s3L') * num_tables )
sizes = OrderedDict()
for tag in sorted(self.tables):
for tag in self:
table = self.tables[tag]
raw = table()
table_len = len(raw)

View File

@ -66,6 +66,11 @@ def subset_truetype(sfnt, character_map):
# }}}
def subset_postscript(sfnt, character_map):
cff = sfnt[b'CFF ']
cff.decompile()
raise Exception('TODO: Implement CFF subsetting')
def subset(raw, individual_chars, ranges=()):
chars = list(map(ord, individual_chars))
for r in ranges:
@ -91,6 +96,10 @@ def subset(raw, individual_chars, ranges=()):
subset_truetype(sfnt, character_map)
elif b'CFF ' in sfnt:
# PostScript Outlines
from calibre.utils.config_base import tweaks
if tweaks['subset_cff_table']:
subset_postscript(sfnt, character_map)
else:
raise UnsupportedFont('This font contains PostScript outlines, '
'subsetting not supported')
else:

View File

@ -0,0 +1,267 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
Try to read invalid zip files with missing or damaged central directories.
These are apparently produced in large numbers by the fruitcakes over at B&N.
Tries to only use the local headers to extract data from the damaged zip file.
'''
import os, sys, zlib, shutil
from struct import calcsize, unpack, pack
from collections import namedtuple, OrderedDict
from tempfile import SpooledTemporaryFile
HEADER_SIG = 0x04034b50
HEADER_BYTE_SIG = pack(b'<L', HEADER_SIG)
local_header_fmt = b'<L5HL2L2H'
local_header_sz = calcsize(local_header_fmt)
ZIP_STORED, ZIP_DEFLATED = 0, 8
LocalHeader = namedtuple('LocalHeader',
'signature min_version flags compression_method mod_time mod_date '
'crc32 compressed_size uncompressed_size filename_length extra_length '
'filename extra')
def decode_arcname(name):
if isinstance(name, bytes):
from calibre.ebooks.chardet import detect
try:
name = name.decode('utf-8')
except:
res = detect(name)
encoding = res['encoding']
try:
name = name.decode(encoding)
except:
name = name.decode('utf-8', 'replace')
return name
def find_local_header(f):
pos = f.tell()
raw = f.read(50*1024)
try:
f.seek(pos + raw.index(HEADER_BYTE_SIG))
except ValueError:
f.seek(pos)
return
raw = f.read(local_header_sz)
if len(raw) != local_header_sz:
f.seek(pos)
return
header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
if header.signature == HEADER_SIG:
return header
f.seek(pos)
def read_local_file_header(f):
pos = f.tell()
raw = f.read(local_header_sz)
if len(raw) != local_header_sz:
f.seek(pos)
return
header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
if header.signature != HEADER_SIG:
f.seek(pos)
header = find_local_header(f)
if header is None:
return
if header.min_version > 20:
raise ValueError('This ZIP file uses unsupported features')
if header.flags & 0b1:
raise ValueError('This ZIP file is encrypted')
if header.flags & (1 << 3):
raise ValueError('This ZIP file uses data descriptors. This is unsupported')
if header.flags & (1 << 13):
raise ValueError('This ZIP file uses masking, unsupported.')
if header.compression_method not in {ZIP_STORED, ZIP_DEFLATED}:
raise ValueError('This ZIP file uses an unsupported compression method')
fname = extra = None
if header.filename_length > 0:
fname = f.read(header.filename_length)
if len(fname) != header.filename_length:
return
try:
fname = fname.decode('ascii')
except UnicodeDecodeError:
if header.flags & (1 << 11):
try:
fname = fname.decode('utf-8')
except UnicodeDecodeError:
pass
fname = decode_arcname(fname).replace('\\', '/')
if header.extra_length > 0:
extra = f.read(header.extra_length)
if len(extra) != header.extra_length:
return
return LocalHeader(*(
header[:-2] + (fname, extra)
))
def read_compressed_data(f, header):
cdata = f.read(header.compressed_size)
return cdata
def copy_stored_file(src, size, dest):
read = 0
amt = min(size, 20*1024)
while read < size:
raw = src.read(min(size-read, amt))
if not raw:
raise ValueError('Premature end of file')
dest.write(raw)
read += len(raw)
def copy_compressed_file(src, size, dest):
d = zlib.decompressobj(-15)
read = 0
amt = min(size, 20*1024)
while read < size:
raw = src.read(min(size-read, amt))
read += len(raw)
dest.write(d.decompress(raw, 200*1024))
count = 0
while d.unconsumed_tail:
count += 1
dest.write(d.decompress(d.unconsumed_tail, 200*1024))
if count > 100:
raise ValueError('This ZIP file contains a ZIP bomb in %s'%
os.path.basename(dest.name))
def _extractall(f, path=None, file_info=None):
found = False
while True:
header = read_local_file_header(f)
if not header:
break
found = True
parts = header.filename.split('/')
if header.uncompressed_size == 0:
# Directory
f.seek(f.tell() + header.compressed_size)
if path is not None:
bdir = os.path.join(path, *parts)
if not os.path.exists(bdir):
os.makedirs(bdir)
continue
# File
if file_info is not None:
file_info[header.filename] = (f.tell(), header)
if path is not None:
bdir = os.path.join(path, *(parts[:-1]))
if not os.path.exists(bdir):
os.makedirs(bdir)
dest = os.path.join(path, *parts)
with open(dest, 'wb') as o:
if header.compression_method == ZIP_STORED:
copy_stored_file(f, header.compressed_size, o)
else:
copy_compressed_file(f, header.compressed_size, o)
else:
f.seek(f.tell() + header.compressed_size)
if not found:
raise ValueError('Not a ZIP file')
def extractall(path_or_stream, path=None):
f = path_or_stream
close_at_end = False
if not hasattr(f, 'read'):
f = open(f, 'rb')
close_at_end = True
if path is None:
path = os.getcwdu()
pos = f.tell()
try:
_extractall(f, path)
finally:
f.seek(pos)
if close_at_end:
f.close()
class LocalZipFile(object):
def __init__(self, stream):
self.file_info = OrderedDict()
_extractall(stream, file_info=self.file_info)
self.stream = stream
def open(self, name, spool_size=5*1024*1024):
if isinstance(name, LocalHeader):
name = name.filename
try:
offset, header = self.file_info.get(name)
except KeyError:
raise ValueError('This ZIP container has no file named: %s'%name)
self.stream.seek(offset)
dest = SpooledTemporaryFile(max_size=spool_size)
if header.compression_method == ZIP_STORED:
copy_stored_file(self.stream, header.compressed_size, dest)
else:
copy_compressed_file(self.stream, header.compressed_size, dest)
dest.seek(0)
return dest
def getinfo(self, name):
try:
offset, header = self.file_info.get(name)
except KeyError:
raise ValueError('This ZIP container has no file named: %s'%name)
return header
def read(self, name, spool_size=5*1024*1024):
with self.open(name, spool_size=spool_size) as f:
return f.read()
def extractall(self, path=None):
self.stream.seek(0)
_extractall(self.stream, path=(path or os.getcwdu()))
def close(self):
pass
def safe_replace(self, name, datastream, extra_replacements={},
add_missing=False):
from calibre.utils.zipfile import ZipFile, ZipInfo
replacements = {name:datastream}
replacements.update(extra_replacements)
names = frozenset(replacements.keys())
found = set([])
with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
ztemp = ZipFile(temp, 'w')
for offset, header in self.file_info.itervalues():
if header.filename in names:
zi = ZipInfo(header.filename)
zi.compress_type = header.compression_method
ztemp.writestr(zi, replacements[header.filename].read())
found.add(header.filename)
else:
ztemp.writestr(header.filename, self.read(header.filename,
spool_size=0))
if add_missing:
for name in names - found:
ztemp.writestr(name, replacements[name].read())
ztemp.close()
zipstream = self.stream
temp.seek(0)
zipstream.seek(0)
zipstream.truncate()
shutil.copyfileobj(temp, zipstream)
zipstream.flush()
if __name__ == '__main__':
extractall(sys.argv[-1])

View File

@ -467,11 +467,11 @@ eject_drive_letter(WCHAR DriveLetter) {
DeviceNumber = -1;
hVolume = CreateFile(szVolumeAccessPath, 0,
hVolume = CreateFileW(szVolumeAccessPath, 0,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, 0, NULL);
if (hVolume == INVALID_HANDLE_VALUE) {
PyErr_SetString(PyExc_ValueError, "Invalid handle value for drive letter");
PyErr_SetFromWindowsErr(0);
return FALSE;
}
@ -529,11 +529,17 @@ eject_drive_letter(WCHAR DriveLetter) {
static PyObject *
winutil_eject_drive(PyObject *self, PyObject *args) {
char DriveLetter;
char letter = '0';
WCHAR DriveLetter = L'0';
if (!PyArg_ParseTuple(args, "c", &DriveLetter)) return NULL;
if (!PyArg_ParseTuple(args, "c", &letter)) return NULL;
if (!eject_drive_letter((WCHAR)DriveLetter)) return NULL;
if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, &letter, 1, &DriveLetter, 1) == 0) {
PyErr_SetFromWindowsErr(0);
return NULL;
}
if (!eject_drive_letter(DriveLetter)) return NULL;
Py_RETURN_NONE;
}