mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
9868fffb02
@ -35,3 +35,7 @@ nbproject/
|
||||
.settings/
|
||||
*.DS_Store
|
||||
calibre_plugins/
|
||||
recipes/.git
|
||||
recipes/.gitignore
|
||||
recipes/README
|
||||
recipes/katalog_egazeciarz.recipe
|
||||
|
@ -327,9 +327,8 @@ You can browse your |app| collection on your Android device is by using the
|
||||
calibre content server, which makes your collection available over the net.
|
||||
First perform the following steps in |app|
|
||||
|
||||
* Set the :guilabel:`Preferred Output Format` in |app| to EPUB (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
|
||||
* Set the output profile to Tablet (this will work for phones as well), under :guilabel:`Preferences->Conversion->Common Options->Page Setup`
|
||||
* Convert the books you want to read on your device to EPUB format by selecting them and clicking the Convert button.
|
||||
* Set the :guilabel:`Preferred Output Format` in |app| to EPUB for normal Android devices or MOBI for Kindles (The output format can be set under :guilabel:`Preferences->Interface->Behavior`)
|
||||
* Convert the books you want to read on your device to EPUB/MOBI format by selecting them and clicking the Convert button.
|
||||
* Turn on the Content Server in |app|'s preferences and leave |app| running.
|
||||
|
||||
Now on your Android device, open the browser and browse to
|
||||
|
@ -2,7 +2,9 @@ import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class FocusRecipe(BasicNewsRecipe):
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'intromatyk <intromatyk@gmail.com>'
|
||||
language = 'pl'
|
||||
@ -12,10 +14,10 @@ class FocusRecipe(BasicNewsRecipe):
|
||||
publisher = u'Gruner + Jahr Polska'
|
||||
category = u'News'
|
||||
description = u'Newspaper'
|
||||
category='magazine'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
category = 'magazine'
|
||||
cover_url = ''
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100000
|
||||
recursions = 0
|
||||
@ -27,15 +29,15 @@ class FocusRecipe(BasicNewsRecipe):
|
||||
simultaneous_downloads = 5
|
||||
|
||||
r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='div', attrs={'id': 'cll'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
|
||||
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'ulm noprint'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'txb'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'h2'}))
|
||||
remove_tags.append(dict(name='ul', attrs={'class': 'txu'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'ulc'}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
@ -46,15 +48,14 @@ class FocusRecipe(BasicNewsRecipe):
|
||||
.fot{font-size: x-small; color: #666666;}
|
||||
'''
|
||||
|
||||
|
||||
feeds = [
|
||||
('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
|
||||
('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
|
||||
('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
|
||||
('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
|
||||
('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
|
||||
('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
|
||||
('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
|
||||
('Nauka', 'http://www.focus.pl/nauka/rss/'),
|
||||
('Historia', 'http://www.focus.pl/historia/rss/'),
|
||||
('Cywilizacja', 'http://www.focus.pl/cywilizacja/rss/'),
|
||||
('Sport', 'http://www.focus.pl/sport/rss/'),
|
||||
('Technika', 'http://www.focus.pl/technika/rss/'),
|
||||
('Przyroda', 'http://www.focus.pl/przyroda/rss/'),
|
||||
('Technologie', 'http://www.focus.pl/gadzety/rss/')
|
||||
]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
@ -65,20 +66,20 @@ class FocusRecipe(BasicNewsRecipe):
|
||||
return None
|
||||
|
||||
def get_cover_url(self):
|
||||
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
|
||||
tag=soup.find(name='div', attrs={'class':'clr fl'})
|
||||
soup = self.index_to_soup('http://www.focus.pl/magazyn/')
|
||||
tag = soup.find(name='div', attrs={'class': 'clr fl'})
|
||||
if tag:
|
||||
self.cover_url='http://www.focus.pl/' + tag.a['href']
|
||||
self.cover_url = 'http://www.focus.pl/' + tag.a['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def print_version(self, url):
|
||||
if url.count ('focus.pl.feedsportal.com'):
|
||||
if url.count('focus.pl.feedsportal.com'):
|
||||
u = url.find('focus0Bpl')
|
||||
u = 'http://www.focus.pl/' + url[u + 11:]
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace ('0E','-')
|
||||
u = u.replace('0E', '-')
|
||||
u = u.replace('/nc/1//story01.htm', '/do-druku/1')
|
||||
else:
|
||||
u = url.replace('/nc/1','/do-druku/1')
|
||||
u = url.replace('/nc/1', '/do-druku/1')
|
||||
return u
|
@ -1,104 +1,107 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
title = u'Gazeta Wyborcza'
|
||||
__author__ = 'fenuks'
|
||||
__author__ = 'fenuks, Artur Stachecki'
|
||||
language = 'pl'
|
||||
description ='news from gazeta.pl'
|
||||
category='newspaper'
|
||||
description = 'news from gazeta.pl'
|
||||
category = 'newspaper'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url='http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
|
||||
INDEX='http://wyborcza.pl'
|
||||
remove_empty_feeds= True
|
||||
masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
|
||||
INDEX = 'http://wyborcza.pl'
|
||||
remove_empty_feeds = True
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
keep_only_tags = dict(id=['gazeta_article', 'article'])
|
||||
remove_tags_after = dict(id='gazeta_article_share')
|
||||
remove_tags = [dict(attrs={'class':['artReadMore', 'gazeta_article_related_new', 'txt_upl']}), dict(id=['gazeta_article_likes', 'gazeta_article_tools', 'rel', 'gazeta_article_tags', 'gazeta_article_share', 'gazeta_article_brand', 'gazeta_article_miniatures'])]
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
remove_tags_before = dict(id='k0')
|
||||
remove_tags_after = dict(id='banP4')
|
||||
remove_tags = [dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})]
|
||||
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
|
||||
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
|
||||
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
|
||||
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
|
||||
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
|
||||
(u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
|
||||
(u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
|
||||
#(u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
|
||||
(u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
|
||||
(u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
|
||||
(u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
|
||||
(u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
|
||||
(u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
|
||||
(u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
|
||||
(u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
|
||||
(u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss')
|
||||
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss')
|
||||
]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a', attrs={'class':'btn'})
|
||||
tag = soup.find(name='a', attrs={'class': 'btn'})
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href'], raw=True)
|
||||
new_soup = self.index_to_soup(tag['href'], raw=True)
|
||||
return new_soup
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
loop=False
|
||||
tag = soup.find('div', attrs={'id':'Str'})
|
||||
if appendtag.find('div', attrs={'id':'Str'}):
|
||||
nexturl=tag.findAll('a')
|
||||
appendtag.find('div', attrs={'id':'Str'}).extract()
|
||||
loop=True
|
||||
loop = False
|
||||
tag = soup.find('div', attrs={'id': 'Str'})
|
||||
if appendtag.find('div', attrs={'id': 'Str'}):
|
||||
nexturl = tag.findAll('a')
|
||||
appendtag.find('div', attrs={'id': 'Str'}).extract()
|
||||
loop = True
|
||||
if appendtag.find(id='source'):
|
||||
appendtag.find(id='source').extract()
|
||||
while loop:
|
||||
loop=False
|
||||
loop = False
|
||||
for link in nexturl:
|
||||
if u'następne' in link.string:
|
||||
url= self.INDEX + link['href']
|
||||
url = self.INDEX + link['href']
|
||||
soup2 = self.index_to_soup(url)
|
||||
pagetext = soup2.find(id='artykul')
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
tag = soup2.find('div', attrs={'id':'Str'})
|
||||
nexturl=tag.findAll('a')
|
||||
loop=True
|
||||
tag = soup2.find('div', attrs={'id': 'Str'})
|
||||
nexturl = tag.findAll('a')
|
||||
loop = True
|
||||
|
||||
def gallery_article(self, appendtag):
|
||||
tag=appendtag.find(id='container_gal')
|
||||
tag = appendtag.find(id='container_gal')
|
||||
if tag:
|
||||
nexturl=appendtag.find(id='gal_btn_next').a['href']
|
||||
nexturl = appendtag.find(id='gal_btn_next').a['href']
|
||||
appendtag.find(id='gal_navi').extract()
|
||||
while nexturl:
|
||||
soup2=self.index_to_soup(nexturl)
|
||||
pagetext=soup2.find(id='container_gal')
|
||||
nexturl=pagetext.find(id='gal_btn_next')
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
pagetext = soup2.find(id='container_gal')
|
||||
nexturl = pagetext.find(id='gal_btn_next')
|
||||
if nexturl:
|
||||
nexturl=nexturl.a['href']
|
||||
nexturl = nexturl.a['href']
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
rem=appendtag.find(id='gal_navi')
|
||||
rem = appendtag.find(id='gal_navi')
|
||||
if rem:
|
||||
rem.extract()
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
if soup.find(attrs={'class': 'piano_btn_1'}):
|
||||
return None
|
||||
else:
|
||||
self.append_page(soup, soup.body)
|
||||
if soup.find(id='container_gal'):
|
||||
self.gallery_article(soup.body)
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
if 'http://wyborcza.biz/biznes/' not in url:
|
||||
return url
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = url.find('wyborcza0Bpl')
|
||||
u = 'http://www.wyborcza.pl/' + url[u + 11:]
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace('0E', '-')
|
||||
u = u.replace('0H', ',')
|
||||
u = u.replace('0I', '_')
|
||||
u = u.replace('0B', '.')
|
||||
u = u.replace('/1,', '/2029020,')
|
||||
u = u.replace('/story01.htm', '')
|
||||
print(u)
|
||||
return u
|
||||
elif 'http://wyborcza.pl/1' in url:
|
||||
return url.replace('http://wyborcza.pl/1', 'http://wyborcza.pl/2029020')
|
||||
else:
|
||||
return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
|
||||
cover=soup.find(id='GWmini2')
|
||||
soup = self.index_to_soup('http://wyborcza.pl/'+ cover.contents[3].a['href'])
|
||||
self.cover_url='http://wyborcza.pl' + soup.img['src']
|
||||
cover = soup.find(id='GWmini2')
|
||||
soup = self.index_to_soup('http://wyborcza.pl/' + cover.contents[3].a['href'])
|
||||
self.cover_url = 'http://wyborcza.pl' + soup.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FocusRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'intromatyk <intromatyk@gmail.com>'
|
||||
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
|
@ -34,16 +34,20 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'story'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleLeftBox'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'socialNewTools'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'socialTools'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxTop'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'recommendations'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'editorPicks'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks editorPicksFirst'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightText'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'articleCopyrightButton'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'articleToolBoxBottom'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'addRecommendation'}))
|
||||
remove_tags.append(dict(name = 'h3', attrs = {'id' : 'tags'}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
@ -67,3 +71,4 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
|
||||
return start + '/' + index + '?print=tak'
|
||||
|
||||
|
||||
|
@ -1,34 +1,55 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.magick import Image
|
||||
class tvn24(BasicNewsRecipe):
|
||||
title = u'TVN24'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
__author__ = 'fenuks, Artur Stachecki'
|
||||
description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
|
||||
category = 'news'
|
||||
language = 'pl'
|
||||
#masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
|
||||
cover_url= 'http://www.userlogos.org/files/logos/Struna/TVN24.jpg'
|
||||
extra_css = 'ul {list-style:none;} \
|
||||
li {list-style:none; float: left; margin: 0 0.15em;} \
|
||||
h2 {font-size: medium} \
|
||||
.date60m {float: left; margin: 0 10px 0 5px;}'
|
||||
masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
|
||||
cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
|
||||
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
keep_only_tags=[dict(name='h1', attrs={'class':['size30 mt10 pb10', 'size38 mt10 pb15']}), dict(name='figure', attrs={'class':'articleMainPhoto articleMainPhotoWide'}), dict(name='article', attrs={'class':['mb20', 'mb20 textArticleDefault']}), dict(name='ul', attrs={'class':'newsItem'})]
|
||||
remove_tags = [dict(name='aside', attrs={'class':['innerArticleModule onRight cols externalContent', 'innerArticleModule center']}), dict(name='div', attrs={'class':['thumbsGallery', 'articleTools', 'article right rd7', 'heading', 'quizContent']}), dict(name='a', attrs={'class':'watchMaterial text'}), dict(name='section', attrs={'class':['quiz toCenter', 'quiz toRight']})]
|
||||
|
||||
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'),
|
||||
(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
|
||||
keep_only_tags=[
|
||||
# dict(name='h1', attrs={'class':'size38 mt20 pb20'}),
|
||||
dict(name='div', attrs={'class':'mainContainer'}),
|
||||
# dict(name='p'),
|
||||
# dict(attrs={'class':['size18 mt10 mb15', 'bold topicSize1', 'fromUsers content', 'textArticleDefault']})
|
||||
]
|
||||
remove_tags=[
|
||||
dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text', 'related galleryGallery align-center', 'advert block-alignment-right', 'userActions', 'socialBookmarks', 'im yourArticle fl', 'dynamicButton addComment fl', 'innerArticleModule onRight cols externalContent', 'thumbsGallery', 'relatedObject customBlockquote align-right', 'lead', 'mainRightColumn', 'articleDateContainer borderGreyBottom', 'socialMediaContainer onRight loaded', 'quizContent', 'twitter', 'facebook', 'googlePlus', 'share', 'voteResult', 'reportTitleBar bgBlue_v4 mb15', 'innerVideoModule center']}),
|
||||
dict(name='article', attrs={'class':['singleArtPhotoCenter', 'singleArtPhotoRight', 'singleArtPhotoLeft']}),
|
||||
dict(name='section', attrs={'id':['forum', 'innerArticle', 'quiz toCenter', 'mb20']}),
|
||||
dict(name='div', attrs={'class':'socialMediaContainer big p20 mb20 borderGrey loaded'})
|
||||
]
|
||||
remove_tags_after=[dict(name='li', attrs={'class':'share'})]
|
||||
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
|
||||
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
tag = soup.find(name='ul', attrs={'class':'newsItem'})
|
||||
if tag:
|
||||
tag.name='div'
|
||||
tag.li.name='div'
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
#process all the images
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
@ -3,6 +3,8 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, matek09, matek09@gmail.com'
|
||||
__copyright__ = 'Modified 2011, Mariusz Wolek <mariusz_dot_wolek @ gmail dot com>'
|
||||
__copyright__ = 'Modified 2012, Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
@ -11,7 +13,7 @@ class Wprost(BasicNewsRecipe):
|
||||
EDITION = 0
|
||||
FIND_LAST_FULL_ISSUE = True
|
||||
EXCLUDE_LOCKED = True
|
||||
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
|
||||
ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png'
|
||||
|
||||
title = u'Wprost'
|
||||
__author__ = 'matek09'
|
||||
@ -20,6 +22,7 @@ class Wprost(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
language = 'pl'
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
|
||||
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
|
||||
@ -35,13 +38,15 @@ class Wprost(BasicNewsRecipe):
|
||||
(re.compile(r'\<td\>\<tr\>\<\/table\>'), lambda match: ''),
|
||||
(re.compile(r'\<table .*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<tr>'), lambda match: ''),
|
||||
(re.compile(r'\<td .*?\>'), lambda match: '')]
|
||||
(re.compile(r'\<td .*?\>'), lambda match: ''),
|
||||
(re.compile(r'\<div id="footer"\>.*?\</footer\>'), lambda match: '')]
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
|
||||
|
||||
|
||||
extra_css = '''
|
||||
.div-header {font-size: x-small; font-weight: bold}
|
||||
'''
|
||||
@ -59,27 +64,26 @@ class Wprost(BasicNewsRecipe):
|
||||
a = 0
|
||||
if self.FIND_LAST_FULL_ISSUE:
|
||||
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
|
||||
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
|
||||
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
|
||||
else:
|
||||
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
|
||||
a = soup.find('a', attrs={'title' : re.compile(r'Spis *', re.IGNORECASE | re.DOTALL)})
|
||||
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
|
||||
self.EDITION_SHORT = a['href'].replace('/tygodnik/?I=15', '')
|
||||
self.cover_url = a.img['src']
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
self.find_last_issue()
|
||||
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
|
||||
feeds = []
|
||||
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
|
||||
for main_block in soup.findAll(attrs={'id': 'content-main-column-element-content'}):
|
||||
articles = list(self.find_articles(main_block))
|
||||
if len(articles) > 0:
|
||||
section = self.tag_to_string(main_block)
|
||||
section = self.tag_to_string(main_block.find('h3'))
|
||||
feeds.append((section, articles))
|
||||
return feeds
|
||||
|
||||
def find_articles(self, main_block):
|
||||
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
|
||||
for a in main_block.findAll('a'):
|
||||
if a.name in "td":
|
||||
break
|
||||
if self.EXCLUDE_LOCKED & self.is_blocked(a):
|
||||
@ -91,3 +95,4 @@ class Wprost(BasicNewsRecipe):
|
||||
'description' : ''
|
||||
}
|
||||
|
||||
|
||||
|
@ -901,6 +901,9 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
for d in drives:
|
||||
try:
|
||||
winutil.eject_drive(bytes(d)[0])
|
||||
except Exception as e:
|
||||
try:
|
||||
prints(as_unicode(e))
|
||||
except:
|
||||
pass
|
||||
|
||||
|
@ -150,8 +150,15 @@ class EPUBInput(InputFormatPlugin):
|
||||
from calibre import walk
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
try:
|
||||
zf = ZipFile(stream)
|
||||
zf.extractall(os.getcwdu())
|
||||
except:
|
||||
log.exception('EPUB appears to be invalid ZIP file, trying a'
|
||||
' more forgiving ZIP parser')
|
||||
from calibre.utils.localunzip import extractall
|
||||
stream.seek(0)
|
||||
extractall(stream)
|
||||
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
|
||||
opf = self.find_opf()
|
||||
if opf is None:
|
||||
|
@ -10,6 +10,7 @@ from cStringIO import StringIO
|
||||
from contextlib import closing
|
||||
|
||||
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
|
||||
from calibre.utils.localunzip import LocalZipFile
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
@ -105,6 +106,9 @@ class OCFReader(OCF):
|
||||
|
||||
class OCFZipReader(OCFReader):
|
||||
def __init__(self, stream, mode='r', root=None):
|
||||
if isinstance(stream, (LocalZipFile, ZipFile)):
|
||||
self.archive = stream
|
||||
else:
|
||||
try:
|
||||
self.archive = ZipFile(stream, mode=mode)
|
||||
except BadZipfile:
|
||||
@ -119,8 +123,18 @@ class OCFZipReader(OCFReader):
|
||||
super(OCFZipReader, self).__init__()
|
||||
|
||||
def open(self, name, mode='r'):
|
||||
if isinstance(self.archive, LocalZipFile):
|
||||
return self.archive.open(name)
|
||||
return StringIO(self.archive.read(name))
|
||||
|
||||
def get_zip_reader(stream, root=None):
|
||||
try:
|
||||
zf = ZipFile(stream, mode='r')
|
||||
except:
|
||||
stream.seek(0)
|
||||
zf = LocalZipFile(stream)
|
||||
return OCFZipReader(zf, root=root)
|
||||
|
||||
class OCFDirReader(OCFReader):
|
||||
def __init__(self, path):
|
||||
self.root = path
|
||||
@ -184,7 +198,12 @@ def render_cover(opf, opf_path, zf, reader=None):
|
||||
def get_cover(opf, opf_path, stream, reader=None):
|
||||
raster_cover = opf.raster_cover
|
||||
stream.seek(0)
|
||||
try:
|
||||
zf = ZipFile(stream)
|
||||
except:
|
||||
stream.seek(0)
|
||||
zf = LocalZipFile(stream)
|
||||
|
||||
if raster_cover:
|
||||
base = posixpath.dirname(opf_path)
|
||||
cpath = posixpath.normpath(posixpath.join(base, raster_cover))
|
||||
@ -207,7 +226,7 @@ def get_cover(opf, opf_path, stream, reader=None):
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
""" Return metadata as a :class:`Metadata` object """
|
||||
stream.seek(0)
|
||||
reader = OCFZipReader(stream)
|
||||
reader = get_zip_reader(stream)
|
||||
mi = reader.opf.to_book_metadata()
|
||||
if extract_cover:
|
||||
try:
|
||||
@ -232,7 +251,7 @@ def _write_new_cover(new_cdata, cpath):
|
||||
|
||||
def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
|
||||
stream.seek(0)
|
||||
reader = OCFZipReader(stream, root=os.getcwdu())
|
||||
reader = get_zip_reader(stream, root=os.getcwdu())
|
||||
raster_cover = reader.opf.raster_cover
|
||||
mi = MetaInformation(mi)
|
||||
new_cdata = None
|
||||
@ -283,6 +302,10 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
|
||||
reader.opf.timestamp = mi.timestamp
|
||||
|
||||
newopf = StringIO(reader.opf.render())
|
||||
if isinstance(reader.archive, LocalZipFile):
|
||||
reader.archive.safe_replace(reader.container[OPF.MIMETYPE], newopf,
|
||||
extra_replacements=replacements)
|
||||
else:
|
||||
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf,
|
||||
extra_replacements=replacements)
|
||||
try:
|
||||
|
@ -239,10 +239,11 @@ class PluginWidget(QWidget,Ui_Form):
|
||||
|
||||
def initialize(self, name, db):
|
||||
'''
|
||||
|
||||
CheckBoxControls (c_type: check_box):
|
||||
['generate_titles','generate_series','generate_genres',
|
||||
'generate_recently_added','generate_descriptions','include_hr']
|
||||
['cross_reference_authors',
|
||||
'generate_titles','generate_series','generate_genres',
|
||||
'generate_recently_added','generate_descriptions',
|
||||
'include_hr']
|
||||
ComboBoxControls (c_type: combo_box):
|
||||
['exclude_source_field','header_note_source_field',
|
||||
'merge_source_field']
|
||||
|
@ -305,7 +305,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
|
||||
<string>Other options</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout_3">
|
||||
<item row="2" column="1">
|
||||
<item row="3" column="1">
|
||||
<layout class="QHBoxLayout" name="merge_with_comments_hl">
|
||||
<item>
|
||||
<widget class="QComboBox" name="merge_source_field">
|
||||
@ -372,7 +372,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<item row="3" column="0">
|
||||
<widget class="QLabel" name="label_9">
|
||||
<property name="minimumSize">
|
||||
<size>
|
||||
@ -397,7 +397,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label_4">
|
||||
<property name="minimumSize">
|
||||
<size>
|
||||
@ -413,7 +413,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<item row="1" column="1">
|
||||
<layout class="QHBoxLayout" name="replace_cover_hl">
|
||||
<item>
|
||||
<widget class="QRadioButton" name="generate_new_cover">
|
||||
@ -447,7 +447,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<item row="2" column="0">
|
||||
<widget class="QLabel" name="label_3">
|
||||
<property name="text">
|
||||
<string>E&xtra Description note:</string>
|
||||
@ -460,7 +460,7 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<item row="2" column="1">
|
||||
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||
<item>
|
||||
<widget class="QComboBox" name="header_note_source_field">
|
||||
@ -561,6 +561,27 @@ The default pattern \[.+\]|\+ excludes tags of the form [tag], e.g., [Test book]
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="text">
|
||||
<string>Author cross-references:</string>
|
||||
</property>
|
||||
<property name="alignment">
|
||||
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<layout class="QHBoxLayout" name="cross_references_hl">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cross_reference_authors">
|
||||
<property name="text">
|
||||
<string>For books with multiple authors, list each author separately</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -6,102 +6,19 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from contextlib import closing
|
||||
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
|
||||
|
||||
from lxml import html
|
||||
|
||||
from PyQt4.Qt import QUrl
|
||||
|
||||
from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store import StorePlugin
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonDEKindleStore(StorePlugin):
|
||||
class AmazonDEKindleStore(AmazonUKKindleStore):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
aff_id = {'tag': 'charhale0a-21'}
|
||||
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de'
|
||||
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454'
|
||||
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') % aff_id
|
||||
if detail_item:
|
||||
aff_id['asin'] = detail_item
|
||||
store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
|
||||
'&location=http://www.amazon.de/ebooks-kindle/b?node=530886031')
|
||||
store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8'
|
||||
'&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de'
|
||||
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') % aff_id
|
||||
open_url(QUrl(store_link))
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
'&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742')
|
||||
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||
# Apparently amazon Europe is responding in UTF-8 now
|
||||
doc = html.fromstring(f.read())
|
||||
|
||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||
format_xpath = './/span[@class="format"]/text()'
|
||||
cover_xpath = './/img[@class="productImage"]/@src'
|
||||
|
||||
for data in doc.xpath(data_xpath):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
# Even though we are searching digital-text only Amazon will still
|
||||
# put in results for non Kindle books (author pages). So we need
|
||||
# to explicitly check if the item is a Kindle book and ignore it
|
||||
# if it isn't.
|
||||
format = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format.lower():
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
# book later.
|
||||
asin = ''.join(data.xpath("@name"))
|
||||
|
||||
cover_url = ''.join(data.xpath(cover_xpath))
|
||||
|
||||
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
|
||||
|
||||
author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
|
||||
if author.startswith('von '):
|
||||
author = author[4:]
|
||||
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url.strip()
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price.strip()
|
||||
s.detail_item = asin.strip()
|
||||
s.formats = 'Kindle'
|
||||
|
||||
yield s
|
||||
|
||||
def get_details(self, search_result, timeout):
|
||||
drm_search_text = u'Gleichzeitige Verwendung von Geräten'
|
||||
drm_free_text = u'Keine Einschränkung'
|
||||
url = 'http://amazon.de/dp/'
|
||||
|
||||
br = browser()
|
||||
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
|
||||
idata = html.fromstring(nf.read())
|
||||
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
|
||||
drm_search_text + '")])'):
|
||||
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
|
||||
drm_free_text + '") and contains(b, "' +
|
||||
drm_search_text + '")])'):
|
||||
search_result.drm = SearchResult.DRM_UNLOCKED
|
||||
else:
|
||||
search_result.drm = SearchResult.DRM_UNKNOWN
|
||||
else:
|
||||
search_result.drm = SearchResult.DRM_LOCKED
|
||||
return True
|
||||
|
@ -6,78 +6,17 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from contextlib import closing
|
||||
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
|
||||
|
||||
from lxml import html
|
||||
|
||||
from PyQt4.Qt import QUrl
|
||||
|
||||
from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store import StorePlugin
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonESKindleStore(StorePlugin):
|
||||
class AmazonESKindleStore(AmazonUKKindleStore):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
aff_id = {'tag': 'charhale09-21'}
|
||||
store_link = 'http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790' % aff_id
|
||||
if detail_item:
|
||||
aff_id['asin'] = detail_item
|
||||
store_link = 'http://www.amazon.es/gp/redirect.html?ie=UTF8&location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3626&creative=24790' % aff_id
|
||||
open_url(QUrl(store_link))
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&'
|
||||
'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790')
|
||||
store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s'
|
||||
'&linkCode=ur2&camp=3626&creative=24790')
|
||||
search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||
# Apparently amazon Europe is responding in UTF-8 now
|
||||
doc = html.fromstring(f.read())
|
||||
|
||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||
format_xpath = './/span[@class="format"]/text()'
|
||||
cover_xpath = './/img[@class="productImage"]/@src'
|
||||
|
||||
for data in doc.xpath(data_xpath):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
# Even though we are searching digital-text only Amazon will still
|
||||
# put in results for non Kindle books (author pages). So we need
|
||||
# to explicitly check if the item is a Kindle book and ignore it
|
||||
# if it isn't.
|
||||
format = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format.lower():
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
# book later.
|
||||
asin = ''.join(data.xpath("@name"))
|
||||
|
||||
cover_url = ''.join(data.xpath(cover_xpath))
|
||||
|
||||
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
|
||||
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||
if author.startswith('de '):
|
||||
author = author[3:]
|
||||
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url.strip()
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price.strip()
|
||||
s.detail_item = asin.strip()
|
||||
s.formats = 'Kindle'
|
||||
s.drm = SearchResult.DRM_UNKNOWN
|
||||
|
||||
yield s
|
||||
|
@ -6,79 +6,16 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from contextlib import closing
|
||||
|
||||
from lxml import html
|
||||
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
|
||||
|
||||
from PyQt4.Qt import QUrl
|
||||
|
||||
from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store import StorePlugin
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonFRKindleStore(StorePlugin):
|
||||
class AmazonFRKindleStore(AmazonUKKindleStore):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
aff_id = {'tag': 'charhale-21'}
|
||||
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
|
||||
|
||||
if detail_item:
|
||||
aff_id['asin'] = detail_item
|
||||
store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
|
||||
open_url(QUrl(store_link))
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738'
|
||||
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||
# Apparently amazon Europe is responding in UTF-8 now
|
||||
doc = html.fromstring(f.read())
|
||||
|
||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||
format_xpath = './/span[@class="format"]/text()'
|
||||
cover_xpath = './/img[@class="productImage"]/@src'
|
||||
|
||||
for data in doc.xpath(data_xpath):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
# Even though we are searching digital-text only Amazon will still
|
||||
# put in results for non Kindle books (author pages). So we need
|
||||
# to explicitly check if the item is a Kindle book and ignore it
|
||||
# if it isn't.
|
||||
format = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format.lower():
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
# book later.
|
||||
asin = ''.join(data.xpath("@name"))
|
||||
|
||||
cover_url = ''.join(data.xpath(cover_xpath))
|
||||
|
||||
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
|
||||
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||
if author.startswith('de '):
|
||||
author = author[3:]
|
||||
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url.strip()
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price.strip()
|
||||
s.detail_item = asin.strip()
|
||||
s.formats = 'Kindle'
|
||||
s.drm = SearchResult.DRM_UNKNOWN
|
||||
|
||||
yield s
|
||||
|
@ -6,78 +6,17 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from contextlib import closing
|
||||
from calibre.gui2.store.stores.amazon_uk_plugin import AmazonUKKindleStore
|
||||
|
||||
from lxml import html
|
||||
|
||||
from PyQt4.Qt import QUrl
|
||||
|
||||
from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store import StorePlugin
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonITKindleStore(StorePlugin):
|
||||
class AmazonITKindleStore(AmazonUKKindleStore):
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
aff_id = {'tag': 'httpcharles07-21'}
|
||||
store_link = 'http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322' % aff_id
|
||||
if detail_item:
|
||||
aff_id['asin'] = detail_item
|
||||
store_link = 'http://www.amazon.it/gp/redirect.html?ie=UTF8&location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=3370&creative=23322' % aff_id
|
||||
open_url(QUrl(store_link))
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&'
|
||||
'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322')
|
||||
store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=3370&creative=23322')
|
||||
search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||
# Apparently amazon Europe is responding in UTF-8 now
|
||||
doc = html.fromstring(f.read())
|
||||
|
||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||
format_xpath = './/span[@class="format"]/text()'
|
||||
cover_xpath = './/img[@class="productImage"]/@src'
|
||||
|
||||
for data in doc.xpath(data_xpath):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
# Even though we are searching digital-text only Amazon will still
|
||||
# put in results for non Kindle books (author pages). So we need
|
||||
# to explicitly check if the item is a Kindle book and ignore it
|
||||
# if it isn't.
|
||||
format = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format.lower():
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
# book later.
|
||||
asin = ''.join(data.xpath("@name"))
|
||||
|
||||
cover_url = ''.join(data.xpath(cover_xpath))
|
||||
|
||||
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
|
||||
author = unicode(''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||
if author.startswith('di '):
|
||||
author = author[3:]
|
||||
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url.strip()
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price.strip()
|
||||
s.detail_item = asin.strip()
|
||||
s.formats = 'Kindle'
|
||||
s.drm = SearchResult.DRM_UNKNOWN
|
||||
|
||||
yield s
|
||||
|
@ -6,8 +6,9 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from contextlib import closing
|
||||
import re
|
||||
|
||||
from contextlib import closing
|
||||
from lxml import html
|
||||
|
||||
from PyQt4.Qt import QUrl
|
||||
@ -18,57 +19,80 @@ from calibre.gui2.store import StorePlugin
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
|
||||
class AmazonUKKindleStore(StorePlugin):
|
||||
aff_id = {'tag': 'calcharles-21'}
|
||||
store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.co.uk/Kindle-eBooks/b?'
|
||||
'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=1634&creative=19450')
|
||||
store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&'
|
||||
'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&'
|
||||
'linkCode=ur2&camp=1634&creative=6738')
|
||||
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
|
||||
'''
|
||||
For comments on the implementation, please see amazon_plugin.py
|
||||
'''
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
aff_id = {'tag': 'calcharles-21'}
|
||||
store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/Kindle-eBooks/b?ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&linkCode=ur2&camp=1634&creative=19450' % aff_id
|
||||
|
||||
store_link = self.store_link % self.aff_id
|
||||
if detail_item:
|
||||
aff_id['asin'] = detail_item
|
||||
store_link = 'http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
|
||||
self.aff_id['asin'] = detail_item
|
||||
store_link = self.store_link_details % self.aff_id
|
||||
open_url(QUrl(store_link))
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
# Apparently amazon Europe is responding in UTF-8 now
|
||||
doc = html.fromstring(f.read())
|
||||
doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))
|
||||
|
||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||
format_xpath = './/span[@class="format"]/text()'
|
||||
data_xpath = '//div[contains(@class, "prod")]'
|
||||
format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
|
||||
asin_xpath = './/div[@class="image"]/a[1]'
|
||||
cover_xpath = './/img[@class="productImage"]/@src'
|
||||
title_xpath = './/h3[@class="newaps"]/a//text()'
|
||||
author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
|
||||
price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'
|
||||
|
||||
for data in doc.xpath(data_xpath):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
# Even though we are searching digital-text only Amazon will still
|
||||
# put in results for non Kindle books (author pages). So we need
|
||||
# put in results for non Kindle books (author pages). Se we need
|
||||
# to explicitly check if the item is a Kindle book and ignore it
|
||||
# if it isn't.
|
||||
format = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format.lower():
|
||||
format_ = ''.join(data.xpath(format_xpath))
|
||||
if 'kindle' not in format_.lower():
|
||||
continue
|
||||
|
||||
# We must have an asin otherwise we can't easily reference the
|
||||
# book later.
|
||||
asin = ''.join(data.xpath("@name"))
|
||||
asin_href = None
|
||||
asin_a = data.xpath(asin_xpath)
|
||||
if asin_a:
|
||||
asin_href = asin_a[0].get('href', '')
|
||||
m = re.search(r'/dp/(?P<asin>.+?)(/|$)', asin_href)
|
||||
if m:
|
||||
asin = m.group('asin')
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
cover_url = ''.join(data.xpath(cover_xpath))
|
||||
|
||||
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span[contains(@class, "price")]/text()'))
|
||||
title = ''.join(data.xpath(title_xpath))
|
||||
author = ''.join(data.xpath(author_xpath))
|
||||
try:
|
||||
author = author.split('by ', 1)[1].split(" (")[0]
|
||||
except:
|
||||
pass
|
||||
|
||||
author = ''.join(data.xpath('.//h3[@class="title"]/span[@class="ptBrand"]/text()'))
|
||||
if author.startswith('by '):
|
||||
author = author[3:]
|
||||
price = ''.join(data.xpath(price_xpath))
|
||||
|
||||
counter -= 1
|
||||
|
||||
@ -78,37 +102,10 @@ class AmazonUKKindleStore(StorePlugin):
|
||||
s.author = author.strip()
|
||||
s.price = price.strip()
|
||||
s.detail_item = asin.strip()
|
||||
s.drm = SearchResult.DRM_UNKNOWN
|
||||
s.formats = 'Kindle'
|
||||
|
||||
yield s
|
||||
|
||||
def get_details(self, search_result, timeout):
|
||||
# We might already have been called.
|
||||
if search_result.drm:
|
||||
return
|
||||
|
||||
url = 'http://amazon.co.uk/dp/'
|
||||
drm_search_text = u'Simultaneous Device Usage'
|
||||
drm_free_text = u'Unlimited'
|
||||
|
||||
br = browser()
|
||||
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
|
||||
idata = html.fromstring(nf.read())
|
||||
if not search_result.author:
|
||||
search_result.author = ''.join(idata.xpath('//div[@class="buying" and contains(., "Author")]/a/text()'))
|
||||
is_kindle = idata.xpath('boolean(//div[@class="buying"]/h1/span/span[contains(text(), "Kindle Edition")])')
|
||||
if is_kindle:
|
||||
search_result.formats = 'Kindle'
|
||||
if idata.xpath('boolean(//div[@class="content"]//li/b[contains(text(), "' +
|
||||
drm_search_text + '")])'):
|
||||
if idata.xpath('boolean(//div[@class="content"]//li[contains(., "' +
|
||||
drm_free_text + '") and contains(b, "' +
|
||||
drm_search_text + '")])'):
|
||||
search_result.drm = SearchResult.DRM_UNLOCKED
|
||||
else:
|
||||
search_result.drm = SearchResult.DRM_UNKNOWN
|
||||
else:
|
||||
search_result.drm = SearchResult.DRM_LOCKED
|
||||
return True
|
||||
|
||||
|
||||
pass
|
||||
|
@ -25,7 +25,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
url = 'http://ad.zanox.com/ppc/?18817073C15644254T'
|
||||
url_details = ('http://ad.zanox.com/ppc/?18817073C15644254T&ULP=[['
|
||||
'http://www.libri.de/shop/action/productDetails?artiId={0}]]')
|
||||
'http://www.ebook.de/shop/action/productDetails?artiId={0}]]')
|
||||
|
||||
if external or self.config.get('open_external', False):
|
||||
if detail_item:
|
||||
@ -41,33 +41,38 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
|
||||
d.exec_()
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
url = ('http://www.libri.de/shop/action/quickSearch?facetNodeId=6'
|
||||
'&mainsearchSubmit=Los!&searchString=' + urllib2.quote(query))
|
||||
url = ('http://www.ebook.de/de/pathSearch?nav=52122&searchString='
|
||||
+ urllib2.quote(query))
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read())
|
||||
for data in doc.xpath('//div[contains(@class, "item")]'):
|
||||
for data in doc.xpath('//div[contains(@class, "articlecontainer")]'):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
details = data.xpath('./div[@class="beschreibungContainer"]')
|
||||
details = data.xpath('./div[@class="articleinfobox"]')
|
||||
if not details:
|
||||
continue
|
||||
details = details[0]
|
||||
id = ''.join(details.xpath('./div[@class="text"]/a/@name')).strip()
|
||||
if not id:
|
||||
id_ = ''.join(details.xpath('./a/@name')).strip()
|
||||
if not id_:
|
||||
continue
|
||||
cover_url = ''.join(details.xpath('.//div[@class="coverImg"]/a/img/@src'))
|
||||
title = ''.join(details.xpath('./div[@class="text"]/span[@class="titel"]/a/text()')).strip()
|
||||
author = ''.join(details.xpath('./div[@class="text"]/span[@class="author"]/text()')).strip()
|
||||
title = ''.join(details.xpath('.//a[@class="su1_c_l_titel"]/text()')).strip()
|
||||
|
||||
author = ''.join(details.xpath('.//div[@class="author"]/text()')).strip()
|
||||
if author.startswith('von'):
|
||||
author = author[4:]
|
||||
|
||||
pdf = details.xpath(
|
||||
'boolean(.//span[@class="format" and contains(text(), "pdf")]/text())')
|
||||
'boolean(.//span[@class="bindername" and contains(text(), "pdf")]/text())')
|
||||
epub = details.xpath(
|
||||
'boolean(.//span[@class="format" and contains(text(), "epub")]/text())')
|
||||
'boolean(.//span[@class="bindername" and contains(text(), "epub")]/text())')
|
||||
mobi = details.xpath(
|
||||
'boolean(.//span[@class="format" and contains(text(), "mobipocket")]/text())')
|
||||
'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())')
|
||||
|
||||
cover_url = ''.join(data.xpath('.//div[@class="coverImg"]/a/img/@src'))
|
||||
price = ''.join(data.xpath('.//span[@class="preis"]/text()')).replace('*', '').strip()
|
||||
|
||||
counter -= 1
|
||||
@ -78,7 +83,7 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
|
||||
s.author = author.strip()
|
||||
s.price = price
|
||||
s.drm = SearchResult.DRM_UNKNOWN
|
||||
s.detail_item = id
|
||||
s.detail_item = id_
|
||||
formats = []
|
||||
if epub:
|
||||
formats.append('ePub')
|
||||
|
@ -41,6 +41,13 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
help = _('Title of generated catalog used as title in metadata.\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, ePub, MOBI output formats")),
|
||||
Option('--cross-reference-authors',
|
||||
default=False,
|
||||
dest='cross_reference_authors',
|
||||
action = 'store_true',
|
||||
help=_("Create cross-references in Authors section for books with multiple authors.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, ePub, MOBI output formats")),
|
||||
Option('--debug-pipeline',
|
||||
default=None,
|
||||
dest='debug_pipeline',
|
||||
@ -58,7 +65,6 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
help=_("Regex describing tags to exclude as genres.\n"
|
||||
"Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n"
|
||||
"Applies to: AZW3, ePub, MOBI output formats")),
|
||||
|
||||
Option('--exclusion-rules',
|
||||
default="(('Catalogs','Tags','Catalog'),)",
|
||||
dest='exclusion_rules',
|
||||
@ -72,7 +78,6 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
"When multiple rules are defined, all rules will be applied.\n"
|
||||
"Default: \n" + '"' + '%default' + '"' + "\n"
|
||||
"Applies to AZW3, ePub, MOBI output formats")),
|
||||
|
||||
Option('--generate-authors',
|
||||
default=False,
|
||||
dest='generate_authors',
|
||||
@ -318,8 +323,8 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
build_log.append(" opts:")
|
||||
for key in keys:
|
||||
if key in ['catalog_title','author_clip','connected_kindle','creator',
|
||||
'description_clip','exclude_book_marker','exclude_genre',
|
||||
'exclude_tags','exclusion_rules', 'fmt',
|
||||
'cross_reference_authors','description_clip','exclude_book_marker',
|
||||
'exclude_genre','exclude_tags','exclusion_rules', 'fmt',
|
||||
'header_note_source_field','merge_comments_rule',
|
||||
'output_profile','prefix_rules','read_book_marker',
|
||||
'search_text','sort_by','sort_descriptions_by_author','sync',
|
||||
|
@ -14,11 +14,12 @@ from calibre.customize.conversion import DummyReporter
|
||||
from calibre.customize.ui import output_profiles
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
|
||||
from calibre.ebooks.chardet import substitute_entites
|
||||
from calibre.ebooks.metadata import author_to_author_sort
|
||||
from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.config import config_dir
|
||||
from calibre.utils.date import format_date, is_date_undefined, now as nowf
|
||||
from calibre.utils.filenames import ascii_text
|
||||
from calibre.utils.filenames import ascii_text, shorten_components_to
|
||||
from calibre.utils.icu import capitalize, collation_order, sort_key
|
||||
from calibre.utils.magick.draw import thumbnail
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
@ -109,6 +110,7 @@ class CatalogBuilder(object):
|
||||
self.stylesheet = stylesheet
|
||||
self.cache_dir = os.path.join(config_dir, 'caches', 'catalog')
|
||||
self.catalog_path = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
|
||||
self.content_dir = os.path.join(self.catalog_path, "content")
|
||||
self.excluded_tags = self.get_excluded_tags()
|
||||
self.generate_for_kindle_azw3 = True if (_opts.fmt == 'azw3' and
|
||||
_opts.output_profile and
|
||||
@ -127,12 +129,13 @@ class CatalogBuilder(object):
|
||||
self.books_by_title = None
|
||||
self.books_by_title_no_series_prefix = None
|
||||
self.books_to_catalog = None
|
||||
self.content_dir = os.path.join(self.catalog_path, "content")
|
||||
self.current_step = 0.0
|
||||
self.error = []
|
||||
self.generate_recently_read = False
|
||||
self.genres = []
|
||||
self.genre_tags_dict = None
|
||||
self.genre_tags_dict = \
|
||||
self.filter_db_tags(max_len = 245 - len("%s/Genre_.html" % self.content_dir)) \
|
||||
if self.opts.generate_genres else None
|
||||
self.html_filelist_1 = []
|
||||
self.html_filelist_2 = []
|
||||
self.merge_comments_rule = dict(zip(['field','position','hr'],
|
||||
@ -505,7 +508,7 @@ class CatalogBuilder(object):
|
||||
if not os.path.isdir(images_path):
|
||||
os.makedirs(images_path)
|
||||
|
||||
def detect_author_sort_mismatches(self):
|
||||
def detect_author_sort_mismatches(self, books_to_test):
|
||||
""" Detect author_sort mismatches.
|
||||
|
||||
Sort by author, look for inconsistencies in author_sort among
|
||||
@ -513,17 +516,18 @@ class CatalogBuilder(object):
|
||||
annoyance for EPUB.
|
||||
|
||||
Inputs:
|
||||
self.books_to_catalog (list): list of books to catalog
|
||||
books_by_author (list): list of books to test, possibly unsorted
|
||||
|
||||
Output:
|
||||
self.books_by_author (list): sorted by author
|
||||
(none)
|
||||
|
||||
Exceptions:
|
||||
AuthorSortMismatchException: author_sort mismatch detected
|
||||
"""
|
||||
|
||||
self.books_by_author = sorted(list(self.books_to_catalog), key=self._kf_books_by_author_sorter_author)
|
||||
authors = [(record['author'], record['author_sort']) for record in self.books_by_author]
|
||||
books_by_author = sorted(list(books_to_test), key=self._kf_books_by_author_sorter_author)
|
||||
|
||||
authors = [(record['author'], record['author_sort']) for record in books_by_author]
|
||||
current_author = authors[0]
|
||||
for (i,author) in enumerate(authors):
|
||||
if author != current_author and i:
|
||||
@ -701,6 +705,7 @@ class CatalogBuilder(object):
|
||||
def fetch_books_by_author(self):
|
||||
""" Generate a list of books sorted by author.
|
||||
|
||||
For books with multiple authors, relist book with additional authors.
|
||||
Sort the database by author. Report author_sort inconsistencies as warning when
|
||||
building EPUB or MOBI, error when building MOBI. Collect a list of unique authors
|
||||
to self.authors.
|
||||
@ -720,25 +725,30 @@ class CatalogBuilder(object):
|
||||
|
||||
self.update_progress_full_step(_("Sorting database"))
|
||||
|
||||
self.detect_author_sort_mismatches()
|
||||
books_by_author = list(self.books_to_catalog)
|
||||
self.detect_author_sort_mismatches(books_by_author)
|
||||
if self.opts.cross_reference_authors:
|
||||
books_by_author = self.relist_multiple_authors(books_by_author)
|
||||
|
||||
#books_by_author = sorted(list(books_by_author), key=self._kf_books_by_author_sorter_author)
|
||||
|
||||
# Sort authors using sort_key to normalize accented letters
|
||||
# Determine the longest author_sort length before sorting
|
||||
asl = [i['author_sort'] for i in self.books_by_author]
|
||||
asl = [i['author_sort'] for i in books_by_author]
|
||||
las = max(asl, key=len)
|
||||
self.books_by_author = sorted(self.books_to_catalog,
|
||||
|
||||
books_by_author = sorted(books_by_author,
|
||||
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
|
||||
|
||||
if self.DEBUG and self.opts.verbose:
|
||||
tl = [i['title'] for i in self.books_by_author]
|
||||
tl = [i['title'] for i in books_by_author]
|
||||
lt = max(tl, key=len)
|
||||
fs = '{:<6}{:<%d} {:<%d} {!s}' % (len(lt),len(las))
|
||||
print(fs.format('','Title','Author','Series'))
|
||||
for i in self.books_by_author:
|
||||
for i in books_by_author:
|
||||
print(fs.format('', i['title'],i['author_sort'],i['series']))
|
||||
|
||||
# Build the unique_authors set from existing data
|
||||
authors = [(record['author'], capitalize(record['author_sort'])) for record in self.books_by_author]
|
||||
authors = [(record['author'], capitalize(record['author_sort'])) for record in books_by_author]
|
||||
|
||||
# authors[] contains a list of all book authors, with multiple entries for multiple books by author
|
||||
# authors[]: (([0]:friendly [1]:sort))
|
||||
@ -776,6 +786,7 @@ class CatalogBuilder(object):
|
||||
author[2])).encode('utf-8'))
|
||||
|
||||
self.authors = unique_authors
|
||||
self.books_by_author = books_by_author
|
||||
return True
|
||||
|
||||
def fetch_books_by_title(self):
|
||||
@ -863,15 +874,15 @@ class CatalogBuilder(object):
|
||||
this_title['series_index'] = 0.0
|
||||
|
||||
this_title['title_sort'] = self.generate_sort_title(this_title['title'])
|
||||
if 'authors' in record:
|
||||
# from calibre.ebooks.metadata import authors_to_string
|
||||
# return authors_to_string(self.authors)
|
||||
|
||||
if 'authors' in record:
|
||||
this_title['authors'] = record['authors']
|
||||
# Synthesize author attribution from authors list
|
||||
if record['authors']:
|
||||
this_title['author'] = " & ".join(record['authors'])
|
||||
else:
|
||||
this_title['author'] = 'Unknown'
|
||||
this_title['author'] = _('Unknown')
|
||||
this_title['authors'] = [this_title['author']]
|
||||
|
||||
if 'author_sort' in record and record['author_sort'].strip():
|
||||
this_title['author_sort'] = record['author_sort']
|
||||
@ -1093,7 +1104,7 @@ class CatalogBuilder(object):
|
||||
|
||||
self.bookmarked_books = bookmarks
|
||||
|
||||
def filter_db_tags(self):
|
||||
def filter_db_tags(self, max_len):
|
||||
""" Remove excluded tags from data set, return normalized genre list.
|
||||
|
||||
Filter all db tags, removing excluded tags supplied in opts.
|
||||
@ -1101,13 +1112,13 @@ class CatalogBuilder(object):
|
||||
tags are flattened to alphanumeric ascii_text.
|
||||
|
||||
Args:
|
||||
(none)
|
||||
max_len: maximum length of normalized tag to fit within OS constraints
|
||||
|
||||
Return:
|
||||
genre_tags_dict (dict): dict of filtered, normalized tags in data set
|
||||
"""
|
||||
|
||||
def _format_tag_list(tags, indent=2, line_break=70, header='Tag list'):
|
||||
def _format_tag_list(tags, indent=1, line_break=70, header='Tag list'):
|
||||
def _next_tag(sorted_tags):
|
||||
for (i, tag) in enumerate(sorted_tags):
|
||||
if i < len(tags) - 1:
|
||||
@ -1126,6 +1137,31 @@ class CatalogBuilder(object):
|
||||
out_str = ' ' * (indent + 1)
|
||||
return ans + out_str
|
||||
|
||||
def _normalize_tag(tag, max_len):
|
||||
""" Generate an XHTML-legal anchor string from tag.
|
||||
|
||||
Parse tag for non-ascii, convert to unicode name.
|
||||
|
||||
Args:
|
||||
tags (str): tag name possible containing symbols
|
||||
max_len (int): maximum length of tag
|
||||
|
||||
Return:
|
||||
normalized (str): unicode names substituted for non-ascii chars,
|
||||
clipped to max_len
|
||||
"""
|
||||
|
||||
normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
|
||||
if re.search('\W',normalized):
|
||||
normalized = ''
|
||||
for c in massaged:
|
||||
if re.search('\W',c):
|
||||
normalized += self.generate_unicode_name(c)
|
||||
else:
|
||||
normalized += c
|
||||
shortened = shorten_components_to(max_len, [normalized])[0]
|
||||
return shortened
|
||||
|
||||
# Entry point
|
||||
normalized_tags = []
|
||||
friendly_tags = []
|
||||
@ -1144,7 +1180,7 @@ class CatalogBuilder(object):
|
||||
if tag == ' ':
|
||||
continue
|
||||
|
||||
normalized_tags.append(self.normalize_tag(tag))
|
||||
normalized_tags.append(_normalize_tag(tag, max_len))
|
||||
friendly_tags.append(tag)
|
||||
|
||||
genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
|
||||
@ -1941,8 +1977,6 @@ class CatalogBuilder(object):
|
||||
|
||||
self.update_progress_full_step(_("Genres HTML"))
|
||||
|
||||
self.genre_tags_dict = self.filter_db_tags()
|
||||
|
||||
# Extract books matching filtered_tags
|
||||
genre_list = []
|
||||
for friendly_tag in sorted(self.genre_tags_dict, key=sort_key):
|
||||
@ -2024,10 +2058,11 @@ class CatalogBuilder(object):
|
||||
books_by_current_author += 1
|
||||
|
||||
# Write the genre book list as an article
|
||||
titles_spanned = self.generate_html_by_genre(genre, True if index==0 else False,
|
||||
outfile = "%s/Genre_%s.html" % (self.content_dir, genre)
|
||||
titles_spanned = self.generate_html_by_genre(genre,
|
||||
True if index==0 else False,
|
||||
genre_tag_set[genre],
|
||||
"%s/Genre_%s.html" % (self.content_dir,
|
||||
genre))
|
||||
outfile)
|
||||
|
||||
tag_file = "content/Genre_%s.html" % genre
|
||||
master_genre_list.append({'tag':genre,
|
||||
@ -2549,7 +2584,7 @@ class CatalogBuilder(object):
|
||||
for (i, tag) in enumerate(sorted(book.get('tags', []))):
|
||||
aTag = Tag(_soup,'a')
|
||||
if self.opts.generate_genres:
|
||||
aTag['href'] = "Genre_%s.html" % self.normalize_tag(tag)
|
||||
aTag['href'] = "Genre_%s.html" % self.genre_tags_dict[tag]
|
||||
aTag.insert(0,escape(NavigableString(tag)))
|
||||
genresTag.insert(gtc, aTag)
|
||||
gtc += 1
|
||||
@ -4603,28 +4638,6 @@ class CatalogBuilder(object):
|
||||
|
||||
return merged
|
||||
|
||||
def normalize_tag(self, tag):
|
||||
""" Generate an XHTML-legal anchor string from tag.
|
||||
|
||||
Parse tag for non-ascii, convert to unicode name.
|
||||
|
||||
Args:
|
||||
tags (str): tag name possible containing symbols
|
||||
|
||||
Return:
|
||||
normalized (str): unicode names substituted for non-ascii chars
|
||||
"""
|
||||
|
||||
normalized = massaged = re.sub('\s','',ascii_text(tag).lower())
|
||||
if re.search('\W',normalized):
|
||||
normalized = ''
|
||||
for c in massaged:
|
||||
if re.search('\W',c):
|
||||
normalized += self.generate_unicode_name(c)
|
||||
else:
|
||||
normalized += c
|
||||
return normalized
|
||||
|
||||
def process_exclusions(self, data_set):
|
||||
""" Filter data_set based on exclusion_rules.
|
||||
|
||||
@ -4697,6 +4710,43 @@ class CatalogBuilder(object):
|
||||
else:
|
||||
return data_set
|
||||
|
||||
def relist_multiple_authors(self, books_by_author):
|
||||
""" Create multiple entries for books with multiple authors
|
||||
|
||||
Given a list of books by author, scan list for books with multiple
|
||||
authors. Add a cloned copy of the book per additional author.
|
||||
|
||||
Args:
|
||||
books_by_author (list): book list possibly containing books
|
||||
with multiple authors
|
||||
|
||||
Return:
|
||||
(list): books_by_author with additional cloned entries for books with
|
||||
multiple authors
|
||||
"""
|
||||
|
||||
multiple_author_books = []
|
||||
|
||||
# Find the multiple author books
|
||||
for book in books_by_author:
|
||||
if len(book['authors']) > 1:
|
||||
multiple_author_books.append(book)
|
||||
|
||||
for book in multiple_author_books:
|
||||
cloned_authors = list(book['authors'])
|
||||
for x, author in enumerate(book['authors']):
|
||||
if x:
|
||||
first_author = cloned_authors.pop(0)
|
||||
cloned_authors.append(first_author)
|
||||
new_book = deepcopy(book)
|
||||
new_book['author'] = ' & '.join(cloned_authors)
|
||||
new_book['authors'] = list(cloned_authors)
|
||||
asl = [author_to_author_sort(auth) for auth in cloned_authors]
|
||||
new_book['author_sort'] = ' & '.join(asl)
|
||||
books_by_author.append(new_book)
|
||||
|
||||
return books_by_author
|
||||
|
||||
def update_progress_full_step(self, description):
|
||||
""" Update calibre's job status UI.
|
||||
|
||||
|
153
src/calibre/utils/fonts/sfnt/cff.py
Normal file
153
src/calibre/utils/fonts/sfnt/cff.py
Normal file
@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from, unpack
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
|
||||
# Useful links
|
||||
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
|
||||
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
|
||||
|
||||
class CFF(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
(self.major_version, self.minor_version, self.header_size,
|
||||
self.offset_size) = unpack_from(b'>4B', raw)
|
||||
if (self.major_version, self.minor_version) != (1, 0):
|
||||
raise UnsupportedFont('The CFF table has unknown version: '
|
||||
'(%d, %d)'%(self.major_version, self.minor_version))
|
||||
offset = self.header_size
|
||||
|
||||
# Read Names Index
|
||||
self.font_names = Index(raw, offset)
|
||||
offset = self.font_names.pos
|
||||
if len(self.font_names) > 1:
|
||||
raise UnsupportedFont('CFF table has more than one font.')
|
||||
# Read Top Dict
|
||||
self.top_index = Index(raw, offset)
|
||||
offset = self.top_index.pos
|
||||
|
||||
# Read strings
|
||||
self.strings = Strings(raw, offset)
|
||||
offset = self.strings.pos
|
||||
print (self.strings[len(cff_standard_strings):])
|
||||
|
||||
class Index(list):
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
list.__init__(self)
|
||||
|
||||
count = unpack_from(b'>H', raw, offset)[0]
|
||||
offset += 2
|
||||
self.pos = offset
|
||||
|
||||
if count > 0:
|
||||
self.offset_size = unpack_from(b'>B', raw, offset)[0]
|
||||
offset += 1
|
||||
if self.offset_size == 3:
|
||||
offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
|
||||
for i in xrange(offset, 3*(count+2), 3)]
|
||||
else:
|
||||
fmt = {1:'B', 2:'H', 4:'L'}.get(self.offset_size)
|
||||
fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
|
||||
offsets = unpack_from(fmt, raw, offset)
|
||||
offset += self.offset_size * (count+1) - 1
|
||||
|
||||
for i in xrange(len(offsets)-1):
|
||||
off, noff = offsets[i:i+2]
|
||||
obj = raw[offset+i:offset+noff]
|
||||
self.append(obj)
|
||||
|
||||
self.pos = offset + offsets[-1]
|
||||
|
||||
class Strings(Index):
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
super(Strings, self).__init__(raw, offset)
|
||||
for x in reversed(cff_standard_strings):
|
||||
self.insert(0, x)
|
||||
|
||||
class CFFTable(UnknownTable):
|
||||
|
||||
def decompile(self):
|
||||
self.cff = CFF(self.raw)
|
||||
|
||||
# cff_standard_strings {{{
|
||||
# The 391 Standard Strings as used in the CFF format.
|
||||
# from Adobe Technical None #5176, version 1.0, 18 March 1998
|
||||
|
||||
cff_standard_strings = [
|
||||
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
|
||||
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
|
||||
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
|
||||
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
|
||||
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
|
||||
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
|
||||
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
|
||||
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
|
||||
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
|
||||
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
|
||||
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
|
||||
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
|
||||
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
|
||||
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
|
||||
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
|
||||
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
|
||||
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
|
||||
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
|
||||
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
|
||||
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
|
||||
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
|
||||
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
|
||||
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
|
||||
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
|
||||
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
|
||||
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
|
||||
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
|
||||
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
|
||||
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
|
||||
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
|
||||
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
|
||||
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
|
||||
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
|
||||
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
|
||||
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
|
||||
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
|
||||
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
|
||||
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
|
||||
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
|
||||
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
|
||||
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
|
||||
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
|
||||
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
|
||||
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
|
||||
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
|
||||
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
|
||||
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
|
||||
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
|
||||
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
|
||||
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
|
||||
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
|
||||
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
|
||||
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
|
||||
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
|
||||
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
|
||||
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
|
||||
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
|
||||
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
|
||||
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
|
||||
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
|
||||
'Semibold'
|
||||
]
|
||||
# }}}
|
||||
|
11
src/calibre/utils/fonts/sfnt/cff/__init__.py
Normal file
11
src/calibre/utils/fonts/sfnt/cff/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
201
src/calibre/utils/fonts/sfnt/cff/dict_data.py
Normal file
201
src/calibre/utils/fonts/sfnt/cff/dict_data.py
Normal file
@ -0,0 +1,201 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack
|
||||
|
||||
t1_operand_encoding = [None] * 256
|
||||
t1_operand_encoding[0:32] = (32) * ["do_operator"]
|
||||
t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
|
||||
t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
|
||||
t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
|
||||
t1_operand_encoding[255] = "read_long_int"
|
||||
|
||||
t2_operand_encoding = t1_operand_encoding[:]
|
||||
t2_operand_encoding[28] = "read_short_int"
|
||||
t2_operand_encoding[255] = "read_fixed_1616"
|
||||
|
||||
cff_dict_operand_encoding = t2_operand_encoding[:]
|
||||
cff_dict_operand_encoding[29] = "read_long_int"
|
||||
cff_dict_operand_encoding[30] = "read_real_number"
|
||||
cff_dict_operand_encoding[255] = "reserved"
|
||||
|
||||
real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'.', 'E', 'E-', None, '-']
|
||||
|
||||
class SimpleConverter(object):
|
||||
|
||||
def read(self, parent, value):
|
||||
return value
|
||||
|
||||
def write(self, parent, value):
|
||||
return value
|
||||
|
||||
class TODO(SimpleConverter):
|
||||
pass
|
||||
|
||||
class Reader(dict):
|
||||
|
||||
def read_byte(self, b0, data, index):
|
||||
return b0 - 139, index
|
||||
|
||||
def read_small_int1(self, b0, data, index):
|
||||
b1 = ord(data[index])
|
||||
return (b0-247)*256 + b1 + 108, index+1
|
||||
|
||||
def read_small_int2(self, b0, data, index):
|
||||
b1 = ord(data[index])
|
||||
return -(b0-251)*256 - b1 - 108, index+1
|
||||
|
||||
def read_short_int(self, b0, data, index):
|
||||
bin = data[index] + data[index+1]
|
||||
value, = unpack(b">h", bin)
|
||||
return value, index+2
|
||||
|
||||
def read_long_int(self, b0, data, index):
|
||||
bin = data[index] + data[index+1] + data[index+2] + data[index+3]
|
||||
value, = unpack(b">l", bin)
|
||||
return value, index+4
|
||||
|
||||
def read_fixed_1616(self, b0, data, index):
|
||||
bin = data[index] + data[index+1] + data[index+2] + data[index+3]
|
||||
value, = unpack(b">l", bin)
|
||||
return value / 65536.0, index+4
|
||||
|
||||
def read_real_number(self, b0, data, index):
|
||||
number = ''
|
||||
while True:
|
||||
b = ord(data[index])
|
||||
index = index + 1
|
||||
nibble0 = (b & 0xf0) >> 4
|
||||
nibble1 = b & 0x0f
|
||||
if nibble0 == 0xf:
|
||||
break
|
||||
number = number + real_nibbles[nibble0]
|
||||
if nibble1 == 0xf:
|
||||
break
|
||||
number = number + real_nibbles[nibble1]
|
||||
return float(number), index
|
||||
|
||||
class Dict(Reader):
|
||||
|
||||
operand_encoding = cff_dict_operand_encoding
|
||||
TABLE = []
|
||||
|
||||
def __init__(self):
|
||||
Reader.__init__(self)
|
||||
table = self.TABLE[:]
|
||||
for i in xrange(len(table)):
|
||||
op, name, arg, default, conv = table[i]
|
||||
if conv is not None:
|
||||
continue
|
||||
if arg in ("delta", "array", 'number', 'SID'):
|
||||
conv = SimpleConverter()
|
||||
else:
|
||||
raise Exception('Should not happen')
|
||||
table[i] = op, name, arg, default, conv
|
||||
|
||||
|
||||
self.operators = {op:(name, arg) for op, name, arg, default, conv in
|
||||
table}
|
||||
|
||||
def decompile(self, strings, global_subrs, data):
|
||||
self.strings = strings
|
||||
self.global_subrs = global_subrs
|
||||
self.stack = []
|
||||
index = 0
|
||||
while index < len(data):
|
||||
b0 = ord(data[index])
|
||||
index += 1
|
||||
handler = getattr(self, self.operand_encoding[b0])
|
||||
value, index = handler(b0, data, index)
|
||||
if value is not None:
|
||||
self.stack.append(value)
|
||||
|
||||
def do_operator(self, b0, data, index):
|
||||
if b0 == 12:
|
||||
op = (b0, ord(data[index]))
|
||||
index += 1
|
||||
else:
|
||||
op = b0
|
||||
operator, arg_type = self.operators[op]
|
||||
self.handle_operator(operator, arg_type)
|
||||
return None, index
|
||||
|
||||
def handle_operator(self, operator, arg_type):
|
||||
if isinstance(arg_type, tuple):
|
||||
value = ()
|
||||
for i in xrange(len(arg_type)-1, -1, -1):
|
||||
arg = arg_type[i]
|
||||
arghandler = getattr(self, 'arg_' + arg)
|
||||
value = (arghandler(operator),) + value
|
||||
else:
|
||||
arghandler = getattr(self, 'arg_' + arg_type)
|
||||
value = arghandler(operator)
|
||||
self[operator] = value
|
||||
|
||||
def arg_number(self, name):
|
||||
return self.stack.pop()
|
||||
|
||||
def arg_SID(self, name):
|
||||
return self.strings[self.stack.pop()]
|
||||
|
||||
def arg_array(self, name):
|
||||
ans = self.stack[:]
|
||||
del self.stack[:]
|
||||
return ans
|
||||
|
||||
def arg_delta(self, name):
|
||||
out = []
|
||||
current = 0
|
||||
for v in self.stack:
|
||||
current = current + v
|
||||
out.append(current)
|
||||
del self.stack[:]
|
||||
return out
|
||||
|
||||
class TopDict(Dict):
|
||||
|
||||
TABLE = [
|
||||
#opcode name argument type default converter
|
||||
((12, 30), 'ROS', ('SID','SID','number'), None, SimpleConverter()),
|
||||
((12, 20), 'SyntheticBase', 'number', None, None),
|
||||
(0, 'version', 'SID', None, None),
|
||||
(1, 'Notice', 'SID', None, None),
|
||||
((12, 0), 'Copyright', 'SID', None, None),
|
||||
(2, 'FullName', 'SID', None, None),
|
||||
((12, 38), 'FontName', 'SID', None, None),
|
||||
(3, 'FamilyName', 'SID', None, None),
|
||||
(4, 'Weight', 'SID', None, None),
|
||||
((12, 1), 'isFixedPitch', 'number', 0, None),
|
||||
((12, 2), 'ItalicAngle', 'number', 0, None),
|
||||
((12, 3), 'UnderlinePosition', 'number', None, None),
|
||||
((12, 4), 'UnderlineThickness', 'number', 50, None),
|
||||
((12, 5), 'PaintType', 'number', 0, None),
|
||||
((12, 6), 'CharstringType', 'number', 2, None),
|
||||
((12, 7), 'FontMatrix', 'array', [0.001,0,0,0.001,0,0], None),
|
||||
(13, 'UniqueID', 'number', None, None),
|
||||
(5, 'FontBBox', 'array', [0,0,0,0], None),
|
||||
((12, 8), 'StrokeWidth', 'number', 0, None),
|
||||
(14, 'XUID', 'array', None, None),
|
||||
((12, 21), 'PostScript', 'SID', None, None),
|
||||
((12, 22), 'BaseFontName', 'SID', None, None),
|
||||
((12, 23), 'BaseFontBlend', 'delta', None, None),
|
||||
((12, 31), 'CIDFontVersion', 'number', 0, None),
|
||||
((12, 32), 'CIDFontRevision', 'number', 0, None),
|
||||
((12, 33), 'CIDFontType', 'number', 0, None),
|
||||
((12, 34), 'CIDCount', 'number', 8720, None),
|
||||
(15, 'charset', 'number', 0, TODO()),
|
||||
((12, 35), 'UIDBase', 'number', None, None),
|
||||
(16, 'Encoding', 'number', 0, TODO()),
|
||||
(18, 'Private', ('number','number'), None, TODO()),
|
||||
((12, 37), 'FDSelect', 'number', None, TODO()),
|
||||
((12, 36), 'FDArray', 'number', None, TODO()),
|
||||
(17, 'CharStrings', 'number', None, TODO()),
|
||||
]
|
||||
|
166
src/calibre/utils/fonts/sfnt/cff/table.py
Normal file
166
src/calibre/utils/fonts/sfnt/cff/table.py
Normal file
@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from, unpack
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
from calibre.utils.fonts.sfnt.cff.dict_data import TopDict
|
||||
|
||||
# Useful links
|
||||
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
|
||||
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
|
||||
|
||||
class CFF(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
(self.major_version, self.minor_version, self.header_size,
|
||||
self.offset_size) = unpack_from(b'>4B', raw)
|
||||
if (self.major_version, self.minor_version) != (1, 0):
|
||||
raise UnsupportedFont('The CFF table has unknown version: '
|
||||
'(%d, %d)'%(self.major_version, self.minor_version))
|
||||
offset = self.header_size
|
||||
|
||||
# Read Names Index
|
||||
self.font_names = Index(raw, offset)
|
||||
offset = self.font_names.pos
|
||||
if len(self.font_names) > 1:
|
||||
raise UnsupportedFont('CFF table has more than one font.')
|
||||
|
||||
# Read Top Dict
|
||||
self.top_index = Index(raw, offset)
|
||||
self.top_dict = TopDict()
|
||||
offset = self.top_index.pos
|
||||
|
||||
# Read strings
|
||||
self.strings = Strings(raw, offset)
|
||||
offset = self.strings.pos
|
||||
|
||||
# Read global subroutines
|
||||
self.global_subrs = GlobalSubrs(raw, offset)
|
||||
offset = self.global_subrs.pos
|
||||
|
||||
# Decompile Top Dict
|
||||
self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
|
||||
import pprint
|
||||
pprint.pprint(self.top_dict)
|
||||
|
||||
class Index(list):
|
||||
|
||||
def __init__(self, raw, offset, prepend=()):
|
||||
list.__init__(self)
|
||||
self.extend(prepend)
|
||||
|
||||
count = unpack_from(b'>H', raw, offset)[0]
|
||||
offset += 2
|
||||
self.pos = offset
|
||||
|
||||
if count > 0:
|
||||
self.offset_size = unpack_from(b'>B', raw, offset)[0]
|
||||
offset += 1
|
||||
if self.offset_size == 3:
|
||||
offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
|
||||
for i in xrange(offset, 3*(count+2), 3)]
|
||||
else:
|
||||
fmt = {1:'B', 2:'H', 4:'L'}.get(self.offset_size)
|
||||
fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
|
||||
offsets = unpack_from(fmt, raw, offset)
|
||||
offset += self.offset_size * (count+1) - 1
|
||||
|
||||
for i in xrange(len(offsets)-1):
|
||||
off, noff = offsets[i:i+2]
|
||||
obj = raw[offset+off:offset+noff]
|
||||
self.append(obj)
|
||||
|
||||
self.pos = offset + offsets[-1]
|
||||
|
||||
class Strings(Index):
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
super(Strings, self).__init__(raw, offset, prepend=cff_standard_strings)
|
||||
|
||||
class GlobalSubrs(Index):
|
||||
pass
|
||||
|
||||
class CFFTable(UnknownTable):
|
||||
|
||||
def decompile(self):
|
||||
self.cff = CFF(self.raw)
|
||||
|
||||
# cff_standard_strings {{{
|
||||
# The 391 Standard Strings as used in the CFF format.
|
||||
# from Adobe Technical None #5176, version 1.0, 18 March 1998
|
||||
|
||||
cff_standard_strings = [
|
||||
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
|
||||
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
|
||||
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
|
||||
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
|
||||
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
|
||||
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
|
||||
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
|
||||
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
|
||||
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
|
||||
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
|
||||
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
|
||||
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
|
||||
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
|
||||
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
|
||||
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
|
||||
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
|
||||
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
|
||||
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
|
||||
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
|
||||
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
|
||||
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
|
||||
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
|
||||
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
|
||||
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
|
||||
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
|
||||
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
|
||||
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
|
||||
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
|
||||
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
|
||||
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
|
||||
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
|
||||
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
|
||||
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
|
||||
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
|
||||
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
|
||||
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
|
||||
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
|
||||
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
|
||||
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
|
||||
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
|
||||
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
|
||||
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
|
||||
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
|
||||
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
|
||||
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
|
||||
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
|
||||
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
|
||||
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
|
||||
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
|
||||
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
|
||||
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
|
||||
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
|
||||
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
|
||||
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
|
||||
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
|
||||
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
|
||||
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
|
||||
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
|
||||
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
|
||||
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
|
||||
'Semibold'
|
||||
]
|
||||
# }}}
|
||||
|
@ -21,6 +21,7 @@ from calibre.utils.fonts.sfnt.maxp import MaxpTable
|
||||
from calibre.utils.fonts.sfnt.loca import LocaTable
|
||||
from calibre.utils.fonts.sfnt.glyf import GlyfTable
|
||||
from calibre.utils.fonts.sfnt.cmap import CmapTable
|
||||
from calibre.utils.fonts.sfnt.cff.table import CFFTable
|
||||
|
||||
# OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm
|
||||
|
||||
@ -42,6 +43,7 @@ class Sfnt(object):
|
||||
b'loca' : LocaTable,
|
||||
b'glyf' : GlyfTable,
|
||||
b'cmap' : CmapTable,
|
||||
b'CFF ' : CFFTable,
|
||||
}.get(table_tag, UnknownTable)(table)
|
||||
|
||||
def __getitem__(self, key):
|
||||
@ -53,12 +55,24 @@ class Sfnt(object):
|
||||
def __delitem__(self, key):
|
||||
del self.tables[key]
|
||||
|
||||
def __iter__(self):
|
||||
'''Iterate over the table tags in optimal order as per
|
||||
http://partners.adobe.com/public/developer/opentype/index_recs.html'''
|
||||
keys = list(self.tables.keys())
|
||||
order = {x:i for i, x in enumerate((b'head', b'hhea', b'maxp', b'OS/2',
|
||||
b'hmtx', b'LTSH', b'VDMX', b'hdmx', b'cmap', b'fpgm', b'prep',
|
||||
b'cvt ', b'loca', b'glyf', b'CFF ', b'kern', b'name', b'post',
|
||||
b'gasp', b'PCLT', b'DSIG'))}
|
||||
keys.sort(key=lambda x:order.get(x, 1000))
|
||||
for x in keys:
|
||||
yield x
|
||||
|
||||
def pop(self, key, default=None):
|
||||
return self.tables.pop(key, default)
|
||||
|
||||
def sizes(self):
|
||||
ans = OrderedDict()
|
||||
for tag in sorted(self.tables):
|
||||
for tag in self:
|
||||
ans[tag] = len(self[tag])
|
||||
return ans
|
||||
|
||||
@ -82,7 +96,7 @@ class Sfnt(object):
|
||||
table_data = []
|
||||
offset = stream.tell() + ( calcsize(b'>4s3L') * num_tables )
|
||||
sizes = OrderedDict()
|
||||
for tag in sorted(self.tables):
|
||||
for tag in self:
|
||||
table = self.tables[tag]
|
||||
raw = table()
|
||||
table_len = len(raw)
|
||||
|
@ -66,6 +66,11 @@ def subset_truetype(sfnt, character_map):
|
||||
|
||||
# }}}
|
||||
|
||||
def subset_postscript(sfnt, character_map):
|
||||
cff = sfnt[b'CFF ']
|
||||
cff.decompile()
|
||||
raise Exception('TODO: Implement CFF subsetting')
|
||||
|
||||
def subset(raw, individual_chars, ranges=()):
|
||||
chars = list(map(ord, individual_chars))
|
||||
for r in ranges:
|
||||
@ -91,6 +96,10 @@ def subset(raw, individual_chars, ranges=()):
|
||||
subset_truetype(sfnt, character_map)
|
||||
elif b'CFF ' in sfnt:
|
||||
# PostScript Outlines
|
||||
from calibre.utils.config_base import tweaks
|
||||
if tweaks['subset_cff_table']:
|
||||
subset_postscript(sfnt, character_map)
|
||||
else:
|
||||
raise UnsupportedFont('This font contains PostScript outlines, '
|
||||
'subsetting not supported')
|
||||
else:
|
||||
|
267
src/calibre/utils/localunzip.py
Normal file
267
src/calibre/utils/localunzip.py
Normal file
@ -0,0 +1,267 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Try to read invalid zip files with missing or damaged central directories.
|
||||
These are apparently produced in large numbers by the fruitcakes over at B&N.
|
||||
|
||||
Tries to only use the local headers to extract data from the damaged zip file.
|
||||
'''
|
||||
|
||||
import os, sys, zlib, shutil
|
||||
from struct import calcsize, unpack, pack
|
||||
from collections import namedtuple, OrderedDict
|
||||
from tempfile import SpooledTemporaryFile
|
||||
|
||||
HEADER_SIG = 0x04034b50
|
||||
HEADER_BYTE_SIG = pack(b'<L', HEADER_SIG)
|
||||
local_header_fmt = b'<L5HL2L2H'
|
||||
local_header_sz = calcsize(local_header_fmt)
|
||||
ZIP_STORED, ZIP_DEFLATED = 0, 8
|
||||
|
||||
LocalHeader = namedtuple('LocalHeader',
|
||||
'signature min_version flags compression_method mod_time mod_date '
|
||||
'crc32 compressed_size uncompressed_size filename_length extra_length '
|
||||
'filename extra')
|
||||
|
||||
def decode_arcname(name):
|
||||
if isinstance(name, bytes):
|
||||
from calibre.ebooks.chardet import detect
|
||||
try:
|
||||
name = name.decode('utf-8')
|
||||
except:
|
||||
res = detect(name)
|
||||
encoding = res['encoding']
|
||||
try:
|
||||
name = name.decode(encoding)
|
||||
except:
|
||||
name = name.decode('utf-8', 'replace')
|
||||
return name
|
||||
|
||||
def find_local_header(f):
|
||||
pos = f.tell()
|
||||
raw = f.read(50*1024)
|
||||
try:
|
||||
f.seek(pos + raw.index(HEADER_BYTE_SIG))
|
||||
except ValueError:
|
||||
f.seek(pos)
|
||||
return
|
||||
raw = f.read(local_header_sz)
|
||||
if len(raw) != local_header_sz:
|
||||
f.seek(pos)
|
||||
return
|
||||
header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
|
||||
if header.signature == HEADER_SIG:
|
||||
return header
|
||||
f.seek(pos)
|
||||
|
||||
def read_local_file_header(f):
|
||||
pos = f.tell()
|
||||
raw = f.read(local_header_sz)
|
||||
if len(raw) != local_header_sz:
|
||||
f.seek(pos)
|
||||
return
|
||||
header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
|
||||
if header.signature != HEADER_SIG:
|
||||
f.seek(pos)
|
||||
header = find_local_header(f)
|
||||
if header is None:
|
||||
return
|
||||
if header.min_version > 20:
|
||||
raise ValueError('This ZIP file uses unsupported features')
|
||||
if header.flags & 0b1:
|
||||
raise ValueError('This ZIP file is encrypted')
|
||||
if header.flags & (1 << 3):
|
||||
raise ValueError('This ZIP file uses data descriptors. This is unsupported')
|
||||
if header.flags & (1 << 13):
|
||||
raise ValueError('This ZIP file uses masking, unsupported.')
|
||||
if header.compression_method not in {ZIP_STORED, ZIP_DEFLATED}:
|
||||
raise ValueError('This ZIP file uses an unsupported compression method')
|
||||
fname = extra = None
|
||||
if header.filename_length > 0:
|
||||
fname = f.read(header.filename_length)
|
||||
if len(fname) != header.filename_length:
|
||||
return
|
||||
try:
|
||||
fname = fname.decode('ascii')
|
||||
except UnicodeDecodeError:
|
||||
if header.flags & (1 << 11):
|
||||
try:
|
||||
fname = fname.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
fname = decode_arcname(fname).replace('\\', '/')
|
||||
if header.extra_length > 0:
|
||||
extra = f.read(header.extra_length)
|
||||
if len(extra) != header.extra_length:
|
||||
return
|
||||
return LocalHeader(*(
|
||||
header[:-2] + (fname, extra)
|
||||
))
|
||||
|
||||
def read_compressed_data(f, header):
|
||||
cdata = f.read(header.compressed_size)
|
||||
return cdata
|
||||
|
||||
def copy_stored_file(src, size, dest):
|
||||
read = 0
|
||||
amt = min(size, 20*1024)
|
||||
while read < size:
|
||||
raw = src.read(min(size-read, amt))
|
||||
if not raw:
|
||||
raise ValueError('Premature end of file')
|
||||
dest.write(raw)
|
||||
read += len(raw)
|
||||
|
||||
def copy_compressed_file(src, size, dest):
|
||||
d = zlib.decompressobj(-15)
|
||||
read = 0
|
||||
amt = min(size, 20*1024)
|
||||
while read < size:
|
||||
raw = src.read(min(size-read, amt))
|
||||
read += len(raw)
|
||||
dest.write(d.decompress(raw, 200*1024))
|
||||
count = 0
|
||||
while d.unconsumed_tail:
|
||||
count += 1
|
||||
dest.write(d.decompress(d.unconsumed_tail, 200*1024))
|
||||
|
||||
if count > 100:
|
||||
raise ValueError('This ZIP file contains a ZIP bomb in %s'%
|
||||
os.path.basename(dest.name))
|
||||
|
||||
def _extractall(f, path=None, file_info=None):
|
||||
found = False
|
||||
while True:
|
||||
header = read_local_file_header(f)
|
||||
if not header:
|
||||
break
|
||||
found = True
|
||||
parts = header.filename.split('/')
|
||||
if header.uncompressed_size == 0:
|
||||
# Directory
|
||||
f.seek(f.tell() + header.compressed_size)
|
||||
if path is not None:
|
||||
bdir = os.path.join(path, *parts)
|
||||
if not os.path.exists(bdir):
|
||||
os.makedirs(bdir)
|
||||
continue
|
||||
|
||||
# File
|
||||
if file_info is not None:
|
||||
file_info[header.filename] = (f.tell(), header)
|
||||
if path is not None:
|
||||
bdir = os.path.join(path, *(parts[:-1]))
|
||||
if not os.path.exists(bdir):
|
||||
os.makedirs(bdir)
|
||||
dest = os.path.join(path, *parts)
|
||||
with open(dest, 'wb') as o:
|
||||
if header.compression_method == ZIP_STORED:
|
||||
copy_stored_file(f, header.compressed_size, o)
|
||||
else:
|
||||
copy_compressed_file(f, header.compressed_size, o)
|
||||
else:
|
||||
f.seek(f.tell() + header.compressed_size)
|
||||
|
||||
if not found:
|
||||
raise ValueError('Not a ZIP file')
|
||||
|
||||
|
||||
def extractall(path_or_stream, path=None):
|
||||
f = path_or_stream
|
||||
close_at_end = False
|
||||
if not hasattr(f, 'read'):
|
||||
f = open(f, 'rb')
|
||||
close_at_end = True
|
||||
if path is None:
|
||||
path = os.getcwdu()
|
||||
pos = f.tell()
|
||||
try:
|
||||
_extractall(f, path)
|
||||
finally:
|
||||
f.seek(pos)
|
||||
if close_at_end:
|
||||
f.close()
|
||||
|
||||
|
||||
class LocalZipFile(object):
|
||||
|
||||
def __init__(self, stream):
|
||||
self.file_info = OrderedDict()
|
||||
_extractall(stream, file_info=self.file_info)
|
||||
self.stream = stream
|
||||
|
||||
def open(self, name, spool_size=5*1024*1024):
|
||||
if isinstance(name, LocalHeader):
|
||||
name = name.filename
|
||||
try:
|
||||
offset, header = self.file_info.get(name)
|
||||
except KeyError:
|
||||
raise ValueError('This ZIP container has no file named: %s'%name)
|
||||
|
||||
self.stream.seek(offset)
|
||||
dest = SpooledTemporaryFile(max_size=spool_size)
|
||||
|
||||
if header.compression_method == ZIP_STORED:
|
||||
copy_stored_file(self.stream, header.compressed_size, dest)
|
||||
else:
|
||||
copy_compressed_file(self.stream, header.compressed_size, dest)
|
||||
dest.seek(0)
|
||||
return dest
|
||||
|
||||
def getinfo(self, name):
|
||||
try:
|
||||
offset, header = self.file_info.get(name)
|
||||
except KeyError:
|
||||
raise ValueError('This ZIP container has no file named: %s'%name)
|
||||
return header
|
||||
|
||||
def read(self, name, spool_size=5*1024*1024):
|
||||
with self.open(name, spool_size=spool_size) as f:
|
||||
return f.read()
|
||||
|
||||
def extractall(self, path=None):
|
||||
self.stream.seek(0)
|
||||
_extractall(self.stream, path=(path or os.getcwdu()))
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def safe_replace(self, name, datastream, extra_replacements={},
|
||||
add_missing=False):
|
||||
from calibre.utils.zipfile import ZipFile, ZipInfo
|
||||
replacements = {name:datastream}
|
||||
replacements.update(extra_replacements)
|
||||
names = frozenset(replacements.keys())
|
||||
found = set([])
|
||||
with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
|
||||
ztemp = ZipFile(temp, 'w')
|
||||
for offset, header in self.file_info.itervalues():
|
||||
if header.filename in names:
|
||||
zi = ZipInfo(header.filename)
|
||||
zi.compress_type = header.compression_method
|
||||
ztemp.writestr(zi, replacements[header.filename].read())
|
||||
found.add(header.filename)
|
||||
else:
|
||||
ztemp.writestr(header.filename, self.read(header.filename,
|
||||
spool_size=0))
|
||||
if add_missing:
|
||||
for name in names - found:
|
||||
ztemp.writestr(name, replacements[name].read())
|
||||
ztemp.close()
|
||||
zipstream = self.stream
|
||||
temp.seek(0)
|
||||
zipstream.seek(0)
|
||||
zipstream.truncate()
|
||||
shutil.copyfileobj(temp, zipstream)
|
||||
zipstream.flush()
|
||||
|
||||
if __name__ == '__main__':
|
||||
extractall(sys.argv[-1])
|
||||
|
@ -467,11 +467,11 @@ eject_drive_letter(WCHAR DriveLetter) {
|
||||
|
||||
DeviceNumber = -1;
|
||||
|
||||
hVolume = CreateFile(szVolumeAccessPath, 0,
|
||||
hVolume = CreateFileW(szVolumeAccessPath, 0,
|
||||
FILE_SHARE_READ | FILE_SHARE_WRITE,
|
||||
NULL, OPEN_EXISTING, 0, NULL);
|
||||
if (hVolume == INVALID_HANDLE_VALUE) {
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid handle value for drive letter");
|
||||
PyErr_SetFromWindowsErr(0);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
@ -529,11 +529,17 @@ eject_drive_letter(WCHAR DriveLetter) {
|
||||
|
||||
static PyObject *
|
||||
winutil_eject_drive(PyObject *self, PyObject *args) {
|
||||
char DriveLetter;
|
||||
char letter = '0';
|
||||
WCHAR DriveLetter = L'0';
|
||||
|
||||
if (!PyArg_ParseTuple(args, "c", &DriveLetter)) return NULL;
|
||||
if (!PyArg_ParseTuple(args, "c", &letter)) return NULL;
|
||||
|
||||
if (!eject_drive_letter((WCHAR)DriveLetter)) return NULL;
|
||||
if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, &letter, 1, &DriveLetter, 1) == 0) {
|
||||
PyErr_SetFromWindowsErr(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!eject_drive_letter(DriveLetter)) return NULL;
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user