mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
0.8.44
This commit is contained in:
commit
02e3c92df0
@ -19,6 +19,61 @@
|
|||||||
# new recipes:
|
# new recipes:
|
||||||
# - title:
|
# - title:
|
||||||
|
|
||||||
|
- version: 0.8.44
|
||||||
|
date: 2012-03-23
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "E-book viewer: A whole new full screen mode, with no toolbars to distract from the text and the ability to set the width of the column of text via Preferences in the ebook viewer."
|
||||||
|
type: major
|
||||||
|
tickets: [959830]
|
||||||
|
|
||||||
|
- title: "Copy to Library: If books were auto merged by the copy to library process, popup a message telling the user about it, as otherwise some people forget they have turned on auto merge and accuse calibre of losing their books."
|
||||||
|
|
||||||
|
- title: "Unix driver for Ectaco JetBook color"
|
||||||
|
tickets: [958442]
|
||||||
|
|
||||||
|
- title: "Add a link to the 'Adding Books Preferences' in the drop down menu of the Add Books button for easier access and more prominence"
|
||||||
|
tickets: [958145]
|
||||||
|
|
||||||
|
- title: "Smarten punctuation: Add a few more cases for detecting opening and closing quotes"
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Get Books: Updates to various store plugins to deal with website changes: Amazon Europe, Waterstones, Foyles, B&N, Kobo, Woblink and Empik"
|
||||||
|
|
||||||
|
- title: "Catalog generation: Do not error out when generating csv/xml catalogs if the catalog title contains filename invalid characters."
|
||||||
|
tickets: [960154]
|
||||||
|
|
||||||
|
- title: "RTF Output: Ignore corrupted images in the input document, instead of erroring out."
|
||||||
|
tickets: [959600]
|
||||||
|
|
||||||
|
- title: "E-book viewer: Try to preserve page position when the window is resized"
|
||||||
|
|
||||||
|
- title: "Fix bug that caused wrong series to be shown when clicking on the first letter of a series group in the Tag Browser"
|
||||||
|
|
||||||
|
- title: "Fix calibre not supporting different http and https proxies."
|
||||||
|
tickets: [960173]
|
||||||
|
|
||||||
|
- title: "MOBI Input: Fix regression caused by KF8 support that broke reading of ancient non-Amazon PRC files"
|
||||||
|
|
||||||
|
- title: "Fix EPUB to EPUB conversion of an EPUB with obfuscated fonts resulting in the fonts not being readable in Adobe Digital Editions"
|
||||||
|
tickets: [957527]
|
||||||
|
|
||||||
|
- title: "RTF Output: Fix bug that broke conversion to RTF when the input document contains <img> tags with no src attribute."
|
||||||
|
|
||||||
|
- title: "Fix regression in 0.8.43 that broke use of general mode templates that ended in a semi-colon."
|
||||||
|
tickets: [957295]
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- b92
|
||||||
|
- Various Polish news sources
|
||||||
|
- Le Monde
|
||||||
|
- FHM UK
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: Ivana Milakovic and Klub knjige
|
||||||
|
author: Darko Miletic
|
||||||
|
|
||||||
|
|
||||||
- version: 0.8.43
|
- version: 0.8.43
|
||||||
date: 2012-03-16
|
date: 2012-03-16
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ class Android_com_pl(BasicNewsRecipe):
|
|||||||
description = 'Android.com.pl - biggest polish Android site'
|
description = 'Android.com.pl - biggest polish Android site'
|
||||||
category = 'Android, mobile'
|
category = 'Android, mobile'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
use_embedded_content=True
|
||||||
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
b92.net
|
b92.net
|
||||||
'''
|
'''
|
||||||
@ -20,13 +20,13 @@ class B92(BasicNewsRecipe):
|
|||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
publication_type = 'newsportal'
|
publication_type = 'newsportal'
|
||||||
masthead_url = 'http://www.b92.net/images/fp/logo.gif'
|
masthead_url = 'http://b92s.net/v4/img/new-logo.png'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
.articledescription{font-family: serif1, serif}
|
|
||||||
.article-info2,.article-info1{text-transform: uppercase; font-size: small}
|
.article-info2,.article-info1{text-transform: uppercase; font-size: small}
|
||||||
|
img{display: block}
|
||||||
|
.sms{font-weight: bold}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -37,11 +37,17 @@ class B92(BasicNewsRecipe):
|
|||||||
, 'linearize_tables' : True
|
, 'linearize_tables' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [
|
||||||
|
(re.compile(u'\u0110'), lambda match: u'\u00D0'),
|
||||||
|
(re.compile(r'<html.*?<body>', re.DOTALL|re.IGNORECASE), lambda match: '<html><head><title>something</title></head><body>')
|
||||||
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':['article-info1','article-text']})]
|
keep_only_tags = [dict(attrs={'class':['article-info1','article-text']})]
|
||||||
remove_attributes = ['width','height','align','hspace','vspace','border']
|
remove_attributes = ['width','height','align','hspace','vspace','border','lang','xmlns:fb']
|
||||||
remove_tags = [dict(name=['embed','link','base','meta'])]
|
remove_tags = [
|
||||||
|
dict(name=['embed','link','base','meta','iframe'])
|
||||||
|
,dict(attrs={'id':'social'})
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' )
|
(u'Vesti' , u'http://www.b92.net/info/rss/vesti.xml' )
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class CGM(BasicNewsRecipe):
|
class CGM(BasicNewsRecipe):
|
||||||
title = u'CGM'
|
title = u'CGM'
|
||||||
@ -17,8 +18,8 @@ class CGM(BasicNewsRecipe):
|
|||||||
remove_tags_before=dict(id='mainContent')
|
remove_tags_before=dict(id='mainContent')
|
||||||
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
||||||
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
||||||
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
|
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
|
||||||
dict(id=['movieShare', 'container'])]
|
dict(id=['movieShare', 'container'])]
|
||||||
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
||||||
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
|
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
|
||||||
|
|
||||||
@ -33,6 +34,8 @@ class CGM(BasicNewsRecipe):
|
|||||||
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
|
img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
|
||||||
gallery.contents[1].name='img'
|
gallery.contents[1].name='img'
|
||||||
gallery.contents[1]['src']=img
|
gallery.contents[1]['src']=img
|
||||||
|
pos = len(gallery.contents)
|
||||||
|
gallery.insert(pos, BeautifulSoup('<br />'))
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ad=soup.findAll('a')
|
ad=soup.findAll('a')
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class Elektroda(BasicNewsRecipe):
|
class Elektroda(BasicNewsRecipe):
|
||||||
title = u'Elektroda'
|
title = u'Elektroda'
|
||||||
@ -13,3 +14,18 @@ class Elektroda(BasicNewsRecipe):
|
|||||||
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
|
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
|
||||||
remove_tags=[dict(name='a', attrs={'href':'#top'})]
|
remove_tags=[dict(name='a', attrs={'href':'#top'})]
|
||||||
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
|
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
tag=soup.find('span', attrs={'class':'postbody'})
|
||||||
|
if tag:
|
||||||
|
pos = len(tag.contents)
|
||||||
|
tag.insert(pos, BeautifulSoup('<br />'))
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def parse_feeds (self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
for feed in feeds:
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
article.title=article.title[article.title.find("::")+3:]
|
||||||
|
return feeds
|
||||||
|
@ -3,10 +3,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||||
title = u'FHM UK'
|
title = u'FHM UK'
|
||||||
description = 'Good News for Men'
|
description = 'Good News for Men'
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
|
||||||
|
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
||||||
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 27/1/12
|
# last updated 17/3/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 28
|
oldest_article = 28
|
||||||
max_articles_per_feed = 12
|
max_articles_per_feed = 12
|
||||||
@ -29,6 +30,8 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
feeds = [
|
feeds = [
|
||||||
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
|
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
|
||||||
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
||||||
(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
(u'Upgrade',u'http://feed43.com/0877305847443234.xml'),
|
||||||
(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
|
#(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
||||||
]
|
#(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
|
||||||
|
(u'Gaming',u'http://feed43.com/6537162612465672.xml'),
|
||||||
|
]
|
||||||
|
@ -13,7 +13,7 @@ class Filmweb_pl(BasicNewsRecipe):
|
|||||||
remove_empty_feeds=True
|
remove_empty_feeds=True
|
||||||
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
|
||||||
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
|
||||||
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
|
keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
|
||||||
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
|
||||||
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
|
||||||
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
|
||||||
|
@ -9,12 +9,12 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
extra_css = 'h2 {font-style: italic; font-size:20px;}'
|
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||||
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
|
||||||
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
|
||||||
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
|
||||||
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
|
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
|
||||||
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
|
||||||
|
|
||||||
def parse_feeds (self):
|
def parse_feeds (self):
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
@ -23,3 +23,33 @@ class Gram_pl(BasicNewsRecipe):
|
|||||||
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
|
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
|
||||||
feed.articles.remove(article)
|
feed.articles.remove(article)
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
||||||
|
while nexturl:
|
||||||
|
soup2 = self.index_to_soup('http://www.gram.pl'+ nexturl['href'])
|
||||||
|
r=appendtag.find(id='pgbox')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
pagetext = soup2.find(attrs={'class':'main'})
|
||||||
|
r=pagetext.find('h1')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
r=pagetext.find('h2')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
for r in pagetext.findAll('script'):
|
||||||
|
r.extract()
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
nexturl = appendtag.find('a', attrs={'class':'cpn'})
|
||||||
|
r=appendtag.find(id='pgbox')
|
||||||
|
if r:
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
tag=soup.findAll(name='div', attrs={'class':'picbox'})
|
||||||
|
for t in tag:
|
||||||
|
t['style']='float: left;'
|
||||||
|
return soup
|
Binary file not shown.
Before Width: | Height: | Size: 413 B After Width: | Height: | Size: 1.5 KiB |
43
recipes/ivanamilakovic.recipe
Normal file
43
recipes/ivanamilakovic.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
ivanamilakovic.blogspot.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class IvanaMilakovic(BasicNewsRecipe):
|
||||||
|
title = u'Ivana Milaković'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = u'Hronika mačijeg škrabala - priče, inspiracija, knjige, pisanje, prevodi...'
|
||||||
|
oldest_article = 80
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'sr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
publication_type = 'blog'
|
||||||
|
extra_css = """
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
body{font-family: Arial,Tahoma,Helvetica,FreeSans,sans1,sans-serif}
|
||||||
|
img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px }
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'knjige, blog, srbija, sf'
|
||||||
|
, 'publisher': 'Ivana Milakovic'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://ivanamilakovic.blogspot.com/feeds/posts/default')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
42
recipes/klubknjige.recipe
Normal file
42
recipes/klubknjige.recipe
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
klub-knjige.blogspot.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class KlubKnjige(BasicNewsRecipe):
|
||||||
|
title = 'Klub knjige'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'literarni blog'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'sr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
publication_type = 'blog'
|
||||||
|
extra_css = """
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
body{font-family: Arial,Tahoma,Helvetica,FreeSans,sans1,sans-serif}
|
||||||
|
img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px }
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : 'knjige, blog, srbija, sf'
|
||||||
|
, 'publisher': 'Klub Knjige'
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
feeds = [(u'Posts', u'http://klub-knjige.blogspot.com/feeds/posts/default')]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
@ -3,7 +3,6 @@ __copyright__ = '2011'
|
|||||||
'''
|
'''
|
||||||
lemonde.fr
|
lemonde.fr
|
||||||
'''
|
'''
|
||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class LeMonde(BasicNewsRecipe):
|
class LeMonde(BasicNewsRecipe):
|
||||||
@ -41,77 +40,8 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
|
||||||
filterDuplicates = True
|
auto_cleanup = True
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for alink in soup.findAll('a'):
|
|
||||||
if alink.string is not None:
|
|
||||||
tstr = alink.string
|
|
||||||
alink.replaceWith(tstr)
|
|
||||||
return self.adeify_images(soup)
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'([0-9])%'), lambda m: m.group(1) + ' %'),
|
|
||||||
(re.compile(r'([0-9])([0-9])([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + m.group(2) + m.group(3) + ' ' + m.group(4) + m.group(5) + m.group(6)),
|
|
||||||
(re.compile(r'([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + ' ' + m.group(2) + m.group(3) + m.group(4)),
|
|
||||||
(re.compile(r'<span>'), lambda match: ' <span>'),
|
|
||||||
(re.compile(r'\("'), lambda match: '(« '),
|
|
||||||
(re.compile(r'"\)'), lambda match: ' »)'),
|
|
||||||
(re.compile(r'“'), lambda match: '(« '),
|
|
||||||
(re.compile(r'”'), lambda match: ' »)'),
|
|
||||||
(re.compile(r'>\''), lambda match: '>‘'),
|
|
||||||
(re.compile(r' \''), lambda match: ' ‘'),
|
|
||||||
(re.compile(r'\''), lambda match: '’'),
|
|
||||||
(re.compile(r'"<em>'), lambda match: '<em>« '),
|
|
||||||
(re.compile(r'"<em>"</em><em>'), lambda match: '<em>« '),
|
|
||||||
(re.compile(r'"<a href='), lambda match: '« <a href='),
|
|
||||||
(re.compile(r'</em>"'), lambda match: ' »</em>'),
|
|
||||||
(re.compile(r'</a>"'), lambda match: ' »</a>'),
|
|
||||||
(re.compile(r'"</'), lambda match: ' »</'),
|
|
||||||
(re.compile(r'>"'), lambda match: '>« '),
|
|
||||||
(re.compile(r'"<'), lambda match: ' »<'),
|
|
||||||
(re.compile(r'’"'), lambda match: '’« '),
|
|
||||||
(re.compile(r' "'), lambda match: ' « '),
|
|
||||||
(re.compile(r'" '), lambda match: ' » '),
|
|
||||||
(re.compile(r'"\.'), lambda match: ' ».'),
|
|
||||||
(re.compile(r'",'), lambda match: ' »,'),
|
|
||||||
(re.compile(r'"\?'), lambda match: ' »?'),
|
|
||||||
(re.compile(r'":'), lambda match: ' »:'),
|
|
||||||
(re.compile(r'";'), lambda match: ' »;'),
|
|
||||||
(re.compile(r'"\!'), lambda match: ' »!'),
|
|
||||||
(re.compile(r' :'), lambda match: ' :'),
|
|
||||||
(re.compile(r' ;'), lambda match: ' ;'),
|
|
||||||
(re.compile(r' \?'), lambda match: ' ?'),
|
|
||||||
(re.compile(r' \!'), lambda match: ' !'),
|
|
||||||
(re.compile(r'\s»'), lambda match: ' »'),
|
|
||||||
(re.compile(r'«\s'), lambda match: '« '),
|
|
||||||
(re.compile(r' %'), lambda match: ' %'),
|
|
||||||
(re.compile(r'\.jpg » border='), lambda match: '.jpg'),
|
|
||||||
(re.compile(r'\.png » border='), lambda match: '.png'),
|
|
||||||
(re.compile(r' – '), lambda match: ' – '),
|
|
||||||
(re.compile(r' – '), lambda match: ' – '),
|
|
||||||
(re.compile(r' - '), lambda match: ' – '),
|
|
||||||
(re.compile(r' -,'), lambda match: ' –,'),
|
|
||||||
(re.compile(r'»:'), lambda match: '» :'),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class':['contenu']})
|
|
||||||
]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':['LM_atome']})]
|
|
||||||
remove_tags_after = [dict(id='appel_temoignage')]
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
url = article.get('guid', None)
|
|
||||||
if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
|
|
||||||
url = None
|
|
||||||
return url
|
|
||||||
|
|
||||||
# def get_article_url(self, article):
|
|
||||||
# link = article.get('link')
|
|
||||||
# if 'blog' not in link and ('chat' not in link):
|
|
||||||
# return link
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('A la une', 'http://www.lemonde.fr/rss/une.xml'),
|
('A la une', 'http://www.lemonde.fr/rss/une.xml'),
|
||||||
@ -137,3 +67,10 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
|
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
url = article.get('guid', None)
|
||||||
|
if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
|
||||||
|
url = None
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,12 +7,12 @@ class naczytniki(BasicNewsRecipe):
|
|||||||
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
description ='everything about e-readers'
|
description ='everything about e-readers'
|
||||||
category='readers'
|
category='e-readers'
|
||||||
no_stylesheets=True
|
no_stylesheets=True
|
||||||
|
use_embedded_content=False
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
|
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
|
||||||
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
|
|
||||||
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
|
||||||
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
|
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
|
||||||
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]
|
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]
|
@ -17,21 +17,8 @@ class Overclock_pl(BasicNewsRecipe):
|
|||||||
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
|
remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
|
||||||
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
|
feeds = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
def append_page(self, soup, appendtag):
|
if 'articles/show' in url:
|
||||||
tag=soup.find(id='navigation')
|
return url.replace('show', 'showall')
|
||||||
if tag:
|
else:
|
||||||
nexturl=tag.findAll('option')
|
return url
|
||||||
tag.extract()
|
|
||||||
for nextpage in nexturl[2:]:
|
|
||||||
soup2 = self.index_to_soup(nextpage['value'])
|
|
||||||
pagetext = soup2.find(id='content')
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
rem=appendtag.find(attrs={'alt':'Pierwsza'})
|
|
||||||
if rem:
|
|
||||||
rem.parent.extract()
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
return soup
|
|
@ -10,5 +10,7 @@ class palmtop_pl(BasicNewsRecipe):
|
|||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
use_embedded_content=True
|
||||||
|
#remove_tags_before=dict(name='h2')
|
||||||
|
#remove_tags_after=dict(attrs={'class':'entry clearfix'})
|
||||||
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
|
feeds = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
|
||||||
|
@ -1,31 +1,25 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class PC_Arena(BasicNewsRecipe):
|
class PC_Arena(BasicNewsRecipe):
|
||||||
title = u'PCArena'
|
title = u'PCArena'
|
||||||
oldest_article = 18300
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = 'fenuks'
|
__author__ = 'fenuks'
|
||||||
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
||||||
category = 'IT'
|
category = 'IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
masthead_url='http://pcarena.pl/public/design/frontend/images/logo.gif'
|
masthead_url='http://pcarena.pl/pcarena/img/logo.png'
|
||||||
cover_url= 'http://pcarena.pl/public/design/frontend/images/logo.gif'
|
cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
|
remove_empty_feeds=True
|
||||||
remove_tags=[dict(attrs={'class':'pages'})]
|
#keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
|
||||||
feeds = [(u'Newsy', u'http://pcarena.pl/misc/rss/news'), (u'Artyku\u0142y', u'http://pcarena.pl/misc/rss/articles')]
|
#remove_tags=[dict(attrs={'class':'pages'})]
|
||||||
|
feeds = [(u'Aktualności', u'http://pcarena.pl/aktualnosci/feeds.rss'), (u'Testy', u'http://pcarena.pl/testy/feeds.rss'), (u'Software', u'http://pcarena.pl/oprogramowanie/feeds.rss'), (u'Poradniki', u'http://pcarena.pl/poradniki/feeds.rss'), (u'Mobile', u'http://pcarena.pl/mobile/feeds.rss')]
|
||||||
|
|
||||||
def append_page(self, soup, appendtag):
|
def print_version(self, url):
|
||||||
tag=soup.find(name='div', attrs={'class':'pagNum'})
|
return url.replace('show', 'print')
|
||||||
if tag:
|
|
||||||
nexturl=tag.findAll('a')
|
|
||||||
tag.extract()
|
|
||||||
for nextpage in nexturl[1:]:
|
|
||||||
nextpage= 'http://pcarena.pl' + nextpage['href']
|
|
||||||
soup2 = self.index_to_soup(nextpage)
|
|
||||||
pagetext = soup2.find(attrs={'class':'artBody'})
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def image_url_processor(self, baseurl, url):
|
||||||
self.append_page(soup, soup.body)
|
if 'http' not in url:
|
||||||
return soup
|
return 'http://pcarena.pl' + url
|
||||||
|
else:
|
||||||
|
return url
|
@ -10,32 +10,11 @@ class PC_Centre(BasicNewsRecipe):
|
|||||||
masthead_url= 'http://pccentre.pl/views/images/logo.gif'
|
masthead_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||||
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
cover_url= 'http://pccentre.pl/views/images/logo.gif'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags= [dict(id='content')]
|
remove_empty_feeds = True
|
||||||
remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
#keep_only_tags= [dict(id='content')]
|
||||||
feeds = [(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
|
#remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
|
||||||
|
remove_tags=[dict(attrs={'class':'logo_print'})]
|
||||||
|
feeds = [(u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n§ion=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n§ion=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n§ion=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n§ion=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n§ion=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n§ion=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n§ion=9')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
def append_page(self, soup, appendtag):
|
return url.replace('show', 'print')
|
||||||
tag=soup.find(name='div', attrs={'class':'pages'})
|
|
||||||
if tag:
|
|
||||||
nexturl=tag.findAll('a')
|
|
||||||
tag.extract()
|
|
||||||
for nextpage in nexturl[:-1]:
|
|
||||||
nextpage= 'http://pccentre.pl' + nextpage['href']
|
|
||||||
soup2 = self.index_to_soup(nextpage)
|
|
||||||
pagetext = soup2.find(id='content')
|
|
||||||
rem=pagetext.findAll(attrs={'class':['subtitle', 'content_info', 'list_of_content', 'pages', 'social2', 'pcc_acc', 'pcc_acc_na']})
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
rem=pagetext.findAll(id='comments')
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
rem=pagetext.findAll('h1')
|
|
||||||
for r in rem:
|
|
||||||
r.extract()
|
|
||||||
pos = len(appendtag.contents)
|
|
||||||
appendtag.insert(pos, pagetext)
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
self.append_page(soup, soup.body)
|
|
||||||
return soup
|
|
@ -8,10 +8,11 @@ class Tablety_pl(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
||||||
category = 'IT'
|
category = 'IT'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
use_embedded_content=True
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
|
||||||
remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
#remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
|
||||||
remove_tags_after=dict(name="div", attrs={'class':'snap_nopreview sharing robots-nocontent'})
|
#remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'})
|
||||||
remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'})]
|
#remove_tags=[dict(name='footer', attrs={'class':'entry-footer clearfix'}), dict(name='div', attrs={'class':'entry-comment-counter'})]
|
||||||
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
||||||
title = u'WNP'
|
title = u'WNP'
|
||||||
@ -8,10 +8,11 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
|||||||
description = u'Wirtualny Nowy Przemysł'
|
description = u'Wirtualny Nowy Przemysł'
|
||||||
category = 'economy'
|
category = 'economy'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
|
preprocess_regexps = [(re.compile(ur'Czytaj też:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Czytaj więcej:.*?</a>', re.DOTALL), lambda match: '')]
|
||||||
oldest_article = 8
|
oldest_article = 8
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets= True
|
no_stylesheets= True
|
||||||
keep_only_tags = dict(name='div', attrs={'id':'contentText'})
|
remove_tags=[dict(attrs={'class':'printF'})]
|
||||||
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
|
||||||
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
|
||||||
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
|
||||||
@ -19,3 +20,7 @@ class AdvancedUserRecipe1312886443(BasicNewsRecipe):
|
|||||||
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
|
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
|
||||||
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
|
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
|
||||||
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
|
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]
|
||||||
|
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'http://wnp.pl/drukuj/' +url[url.find(',')+1:]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
|
|||||||
from setup.installer.windows.wix import WixMixIn
|
from setup.installer.windows.wix import WixMixIn
|
||||||
|
|
||||||
OPENSSL_DIR = r'Q:\openssl'
|
OPENSSL_DIR = r'Q:\openssl'
|
||||||
QT_DIR = 'Q:\\Qt\\4.7.3'
|
QT_DIR = 'Q:\\Qt\\4.8.0'
|
||||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||||
SW = r'C:\cygwin\home\kovid\sw'
|
SW = r'C:\cygwin\home\kovid\sw'
|
||||||
|
@ -97,7 +97,9 @@ Now, run configure and make::
|
|||||||
|
|
||||||
-no-plugin-manifests is needed so that loading the plugins does not fail looking for the CRT assembly
|
-no-plugin-manifests is needed so that loading the plugins does not fail looking for the CRT assembly
|
||||||
|
|
||||||
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -no-plugin-manifests -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
|
configure -opensource -release -qt-zlib -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -no-plugin-manifests -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
|
||||||
|
|
||||||
|
Add the path to the bin folder inside the Qt dir to your system PATH.
|
||||||
|
|
||||||
SIP
|
SIP
|
||||||
-----
|
-----
|
||||||
|
@ -10,19 +10,19 @@ msgstr ""
|
|||||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||||
"devel@lists.alioth.debian.org>\n"
|
"devel@lists.alioth.debian.org>\n"
|
||||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||||
"PO-Revision-Date: 2011-09-27 16:03+0000\n"
|
"PO-Revision-Date: 2012-03-18 12:56+0000\n"
|
||||||
"Last-Translator: Kovid Goyal <Unknown>\n"
|
"Last-Translator: Vibhav Pant <vibhavp@gmail.com>\n"
|
||||||
"Language-Team: Hindi\n"
|
"Language-Team: Hindi\n"
|
||||||
"MIME-Version: 1.0\n"
|
"MIME-Version: 1.0\n"
|
||||||
"Content-Type: text/plain; charset=UTF-8\n"
|
"Content-Type: text/plain; charset=UTF-8\n"
|
||||||
"Content-Transfer-Encoding: 8bit\n"
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
"X-Launchpad-Export-Date: 2011-11-26 05:19+0000\n"
|
"X-Launchpad-Export-Date: 2012-03-19 04:40+0000\n"
|
||||||
"X-Generator: Launchpad (build 14381)\n"
|
"X-Generator: Launchpad (build 14969)\n"
|
||||||
"Language: \n"
|
"Language: \n"
|
||||||
|
|
||||||
#. name for aaa
|
#. name for aaa
|
||||||
msgid "Ghotuo"
|
msgid "Ghotuo"
|
||||||
msgstr ""
|
msgstr "घोटुओ"
|
||||||
|
|
||||||
#. name for aab
|
#. name for aab
|
||||||
msgid "Alumu-Tesu"
|
msgid "Alumu-Tesu"
|
||||||
@ -30,7 +30,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for aac
|
#. name for aac
|
||||||
msgid "Ari"
|
msgid "Ari"
|
||||||
msgstr ""
|
msgstr "अरी"
|
||||||
|
|
||||||
#. name for aad
|
#. name for aad
|
||||||
msgid "Amal"
|
msgid "Amal"
|
||||||
@ -58,11 +58,11 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for aak
|
#. name for aak
|
||||||
msgid "Ankave"
|
msgid "Ankave"
|
||||||
msgstr ""
|
msgstr "अनकावे"
|
||||||
|
|
||||||
#. name for aal
|
#. name for aal
|
||||||
msgid "Afade"
|
msgid "Afade"
|
||||||
msgstr ""
|
msgstr "अफ़ाडे"
|
||||||
|
|
||||||
#. name for aam
|
#. name for aam
|
||||||
msgid "Aramanik"
|
msgid "Aramanik"
|
||||||
@ -74,7 +74,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for aao
|
#. name for aao
|
||||||
msgid "Arabic; Algerian Saharan"
|
msgid "Arabic; Algerian Saharan"
|
||||||
msgstr ""
|
msgstr "अरबी भाषा; अल्जीरियाई सहारा"
|
||||||
|
|
||||||
#. name for aap
|
#. name for aap
|
||||||
msgid "Arára; Pará"
|
msgid "Arára; Pará"
|
||||||
@ -94,11 +94,11 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for aat
|
#. name for aat
|
||||||
msgid "Albanian; Arvanitika"
|
msgid "Albanian; Arvanitika"
|
||||||
msgstr ""
|
msgstr "अल्बानियन भाषा; अरवनितिका"
|
||||||
|
|
||||||
#. name for aau
|
#. name for aau
|
||||||
msgid "Abau"
|
msgid "Abau"
|
||||||
msgstr ""
|
msgstr "अबाऊ"
|
||||||
|
|
||||||
#. name for aaw
|
#. name for aaw
|
||||||
msgid "Solong"
|
msgid "Solong"
|
||||||
@ -110,7 +110,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for aaz
|
#. name for aaz
|
||||||
msgid "Amarasi"
|
msgid "Amarasi"
|
||||||
msgstr ""
|
msgstr "अमारासि"
|
||||||
|
|
||||||
#. name for aba
|
#. name for aba
|
||||||
msgid "Abé"
|
msgid "Abé"
|
||||||
@ -142,7 +142,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for abh
|
#. name for abh
|
||||||
msgid "Arabic; Tajiki"
|
msgid "Arabic; Tajiki"
|
||||||
msgstr ""
|
msgstr "अरबी; ताजिकि"
|
||||||
|
|
||||||
#. name for abi
|
#. name for abi
|
||||||
msgid "Abidji"
|
msgid "Abidji"
|
||||||
@ -150,7 +150,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for abj
|
#. name for abj
|
||||||
msgid "Aka-Bea"
|
msgid "Aka-Bea"
|
||||||
msgstr ""
|
msgstr "अका-बीआ"
|
||||||
|
|
||||||
#. name for abk
|
#. name for abk
|
||||||
msgid "Abkhazian"
|
msgid "Abkhazian"
|
||||||
@ -166,11 +166,11 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for abn
|
#. name for abn
|
||||||
msgid "Abua"
|
msgid "Abua"
|
||||||
msgstr ""
|
msgstr "अबुआ"
|
||||||
|
|
||||||
#. name for abo
|
#. name for abo
|
||||||
msgid "Abon"
|
msgid "Abon"
|
||||||
msgstr ""
|
msgstr "अबोन"
|
||||||
|
|
||||||
#. name for abp
|
#. name for abp
|
||||||
msgid "Ayta; Abellen"
|
msgid "Ayta; Abellen"
|
||||||
@ -178,7 +178,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for abq
|
#. name for abq
|
||||||
msgid "Abaza"
|
msgid "Abaza"
|
||||||
msgstr ""
|
msgstr "अबाज़ा"
|
||||||
|
|
||||||
#. name for abr
|
#. name for abr
|
||||||
msgid "Abron"
|
msgid "Abron"
|
||||||
@ -186,7 +186,7 @@ msgstr ""
|
|||||||
|
|
||||||
#. name for abs
|
#. name for abs
|
||||||
msgid "Malay; Ambonese"
|
msgid "Malay; Ambonese"
|
||||||
msgstr ""
|
msgstr "मलय; अम्बोनीसी"
|
||||||
|
|
||||||
#. name for abt
|
#. name for abt
|
||||||
msgid "Ambulas"
|
msgid "Ambulas"
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -381,12 +381,15 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
|||||||
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
||||||
opener.addheaders = [('User-agent', user_agent)]
|
opener.addheaders = [('User-agent', user_agent)]
|
||||||
proxies = get_proxies()
|
proxies = get_proxies()
|
||||||
|
to_add = {}
|
||||||
http_proxy = proxies.get('http', None)
|
http_proxy = proxies.get('http', None)
|
||||||
if http_proxy:
|
if http_proxy:
|
||||||
opener.set_proxies({'http':http_proxy})
|
to_add['http'] = http_proxy
|
||||||
https_proxy = proxies.get('https', None)
|
https_proxy = proxies.get('https', None)
|
||||||
if https_proxy:
|
if https_proxy:
|
||||||
opener.set_proxies({'https':https_proxy})
|
to_add['https'] = https_proxy
|
||||||
|
if to_add:
|
||||||
|
opener.set_proxies(to_add)
|
||||||
|
|
||||||
return opener
|
return opener
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = u'calibre'
|
__appname__ = u'calibre'
|
||||||
numeric_version = (0, 8, 43)
|
numeric_version = (0, 8, 44)
|
||||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
|
|
||||||
|
@ -625,7 +625,8 @@ from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
|
|||||||
POCKETBOOK701, POCKETBOOK360P, PI2)
|
POCKETBOOK701, POCKETBOOK360P, PI2)
|
||||||
from calibre.devices.iliad.driver import ILIAD
|
from calibre.devices.iliad.driver import ILIAD
|
||||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
from calibre.devices.jetbook.driver import (JETBOOK, MIBUK, JETBOOK_MINI,
|
||||||
|
JETBOOK_COLOR)
|
||||||
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
|
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
|
||||||
KINDLE_FIRE)
|
KINDLE_FIRE)
|
||||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||||
@ -664,9 +665,7 @@ plugins += [
|
|||||||
ILIAD,
|
ILIAD,
|
||||||
IREXDR1000,
|
IREXDR1000,
|
||||||
IREXDR800,
|
IREXDR800,
|
||||||
JETBOOK,
|
JETBOOK, JETBOOK_MINI, MIBUK, JETBOOK_COLOR,
|
||||||
JETBOOK_MINI,
|
|
||||||
MIBUK,
|
|
||||||
SHINEBOOK,
|
SHINEBOOK,
|
||||||
POCKETBOOK360, POCKETBOOK301, POCKETBOOK602, POCKETBOOK701, POCKETBOOK360P,
|
POCKETBOOK360, POCKETBOOK301, POCKETBOOK602, POCKETBOOK701, POCKETBOOK360P,
|
||||||
PI2,
|
PI2,
|
||||||
@ -1539,6 +1538,7 @@ class StoreWaterstonesUKStore(StoreBase):
|
|||||||
|
|
||||||
headquarters = 'UK'
|
headquarters = 'UK'
|
||||||
formats = ['EPUB', 'PDF']
|
formats = ['EPUB', 'PDF']
|
||||||
|
affiliate = True
|
||||||
|
|
||||||
class StoreWeightlessBooksStore(StoreBase):
|
class StoreWeightlessBooksStore(StoreBase):
|
||||||
name = 'Weightless Books'
|
name = 'Weightless Books'
|
||||||
@ -1558,15 +1558,6 @@ class StoreWHSmithUKStore(StoreBase):
|
|||||||
headquarters = 'UK'
|
headquarters = 'UK'
|
||||||
formats = ['EPUB', 'PDF']
|
formats = ['EPUB', 'PDF']
|
||||||
|
|
||||||
class StoreWizardsTowerBooksStore(StoreBase):
|
|
||||||
name = 'Wizards Tower Books'
|
|
||||||
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
|
|
||||||
actual_plugin = 'calibre.gui2.store.stores.wizards_tower_books_plugin:WizardsTowerBooksStore'
|
|
||||||
|
|
||||||
drm_free_only = True
|
|
||||||
headquarters = 'UK'
|
|
||||||
formats = ['EPUB', 'MOBI']
|
|
||||||
|
|
||||||
class StoreWoblinkStore(StoreBase):
|
class StoreWoblinkStore(StoreBase):
|
||||||
name = 'Woblink'
|
name = 'Woblink'
|
||||||
author = u'Tomasz Długosz'
|
author = u'Tomasz Długosz'
|
||||||
@ -1574,7 +1565,7 @@ class StoreWoblinkStore(StoreBase):
|
|||||||
actual_plugin = 'calibre.gui2.store.stores.woblink_plugin:WoblinkStore'
|
actual_plugin = 'calibre.gui2.store.stores.woblink_plugin:WoblinkStore'
|
||||||
|
|
||||||
headquarters = 'PL'
|
headquarters = 'PL'
|
||||||
formats = ['EPUB', 'PDF', 'WOBLINK']
|
formats = ['EPUB', 'MOBI', 'PDF', 'WOBLINK']
|
||||||
|
|
||||||
class XinXiiStore(StoreBase):
|
class XinXiiStore(StoreBase):
|
||||||
name = 'XinXii'
|
name = 'XinXii'
|
||||||
@ -1637,7 +1628,6 @@ plugins += [
|
|||||||
StoreWaterstonesUKStore,
|
StoreWaterstonesUKStore,
|
||||||
StoreWeightlessBooksStore,
|
StoreWeightlessBooksStore,
|
||||||
StoreWHSmithUKStore,
|
StoreWHSmithUKStore,
|
||||||
StoreWizardsTowerBooksStore,
|
|
||||||
StoreWoblinkStore,
|
StoreWoblinkStore,
|
||||||
XinXiiStore,
|
XinXiiStore,
|
||||||
StoreZixoStore
|
StoreZixoStore
|
||||||
|
@ -234,7 +234,7 @@ def main(args=sys.argv):
|
|||||||
sql_dump = args[-1]
|
sql_dump = args[-1]
|
||||||
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
|
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
|
||||||
elif opts.inspect_mobi:
|
elif opts.inspect_mobi:
|
||||||
from calibre.ebooks.mobi.debug import inspect_mobi
|
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||||
for path in args[1:]:
|
for path in args[1:]:
|
||||||
prints('Inspecting:', path)
|
prints('Inspecting:', path)
|
||||||
inspect_mobi(path)
|
inspect_mobi(path)
|
||||||
|
@ -125,4 +125,29 @@ class JETBOOK_MINI(USBMS):
|
|||||||
|
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class JETBOOK_COLOR(USBMS):
|
||||||
|
|
||||||
|
'''
|
||||||
|
set([(u'0x951',
|
||||||
|
u'0x160b',
|
||||||
|
u'0x0',
|
||||||
|
u'Freescale',
|
||||||
|
u'Mass Storage Device',
|
||||||
|
u'0802270905553')])
|
||||||
|
'''
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'rtf', 'txt', 'pdf', 'djvu']
|
||||||
|
|
||||||
|
gui_name = 'JetBook Color'
|
||||||
|
name = 'JetBook Color Device Interface'
|
||||||
|
description = _('Communicate with the JetBook Color reader.')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
|
||||||
|
VENDOR_ID = [0x951]
|
||||||
|
PRODUCT_ID = [0x160b]
|
||||||
|
BCD = [0x0]
|
||||||
|
EBOOK_DIR_MAIN = 'My Books'
|
||||||
|
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class PRS505(USBMS):
|
|||||||
booklist_class = CollectionsBookList
|
booklist_class = CollectionsBookList
|
||||||
|
|
||||||
|
|
||||||
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
|
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt', 'zbf']
|
||||||
CAN_SET_METADATA = ['title', 'authors', 'collections']
|
CAN_SET_METADATA = ['title', 'authors', 'collections']
|
||||||
CAN_DO_DEVICE_DB_PLUGBOARD = True
|
CAN_DO_DEVICE_DB_PLUGBOARD = True
|
||||||
|
|
||||||
|
@ -190,12 +190,22 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
|
||||||
uuid = unicode(x).split(':')[-1]
|
uuid = unicode(x).split(':')[-1]
|
||||||
break
|
break
|
||||||
|
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
|
||||||
|
|
||||||
if uuid is None:
|
if uuid is None:
|
||||||
self.log.warn('No UUID identifier found')
|
self.log.warn('No UUID identifier found')
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
uuid = str(uuid4())
|
uuid = str(uuid4())
|
||||||
oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
|
oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
|
||||||
|
|
||||||
|
if encrypted_fonts and not uuid.startswith('urn:uuid:'):
|
||||||
|
# Apparently ADE requires this value to start with urn:uuid:
|
||||||
|
# for some absurd reason, or it will throw a hissy fit and refuse
|
||||||
|
# to use the obfuscated fonts.
|
||||||
|
for x in identifiers:
|
||||||
|
if unicode(x) == uuid:
|
||||||
|
x.content = 'urn:uuid:'+uuid
|
||||||
|
|
||||||
with TemporaryDirectory(u'_epub_output') as tdir:
|
with TemporaryDirectory(u'_epub_output') as tdir:
|
||||||
from calibre.customize.ui import plugin_for_output_format
|
from calibre.customize.ui import plugin_for_output_format
|
||||||
metadata_xml = None
|
metadata_xml = None
|
||||||
@ -210,7 +220,6 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
||||||
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
|
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
|
||||||
if x.endswith('.ncx')][0])
|
if x.endswith('.ncx')][0])
|
||||||
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
|
|
||||||
encryption = None
|
encryption = None
|
||||||
if encrypted_fonts:
|
if encrypted_fonts:
|
||||||
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
||||||
|
@ -59,7 +59,10 @@ class MOBIInput(InputFormatPlugin):
|
|||||||
if mr.kf8_type is not None:
|
if mr.kf8_type is not None:
|
||||||
log('Found KF8 MOBI of type %r'%mr.kf8_type)
|
log('Found KF8 MOBI of type %r'%mr.kf8_type)
|
||||||
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
|
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
|
||||||
return os.path.abspath(Mobi8Reader(mr, log)())
|
mr = Mobi8Reader(mr, log)
|
||||||
|
opf = os.path.abspath(mr())
|
||||||
|
self.encrypted_fonts = mr.encrypted_fonts
|
||||||
|
return opf
|
||||||
|
|
||||||
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
||||||
if raw:
|
if raw:
|
||||||
|
@ -179,7 +179,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
writer(oeb, output_path)
|
writer(oeb, output_path)
|
||||||
|
|
||||||
if opts.extract_to is not None:
|
if opts.extract_to is not None:
|
||||||
from calibre.ebooks.mobi.debug import inspect_mobi
|
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||||
ddir = opts.extract_to
|
ddir = opts.extract_to
|
||||||
inspect_mobi(output_path, ddir=ddir)
|
inspect_mobi(output_path, ddir=ddir)
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
16
src/calibre/ebooks/mobi/debug/__init__.py
Normal file
16
src/calibre/ebooks/mobi/debug/__init__.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
def format_bytes(byts):
|
||||||
|
byts = bytearray(byts)
|
||||||
|
byts = [hex(b)[2:] for b in byts]
|
||||||
|
return ' '.join(byts)
|
||||||
|
|
||||||
|
|
535
src/calibre/ebooks/mobi/debug/headers.py
Normal file
535
src/calibre/ebooks/mobi/debug/headers.py
Normal file
@ -0,0 +1,535 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import struct, datetime, os
|
||||||
|
|
||||||
|
from calibre.utils.date import utc_tz
|
||||||
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
|
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||||
|
from calibre.ebooks.mobi.debug import format_bytes
|
||||||
|
from calibre.ebooks.mobi.utils import get_trailing_data
|
||||||
|
|
||||||
|
# PalmDB {{{
|
||||||
|
class PalmDOCAttributes(object):
|
||||||
|
|
||||||
|
class Attr(object):
|
||||||
|
|
||||||
|
def __init__(self, name, field, val):
|
||||||
|
self.name = name
|
||||||
|
self.val = val & field
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '%s: %s'%(self.name, bool(self.val))
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.val = struct.unpack(b'<H', raw)[0]
|
||||||
|
self.attributes = []
|
||||||
|
for name, field in [('Read Only', 0x02), ('Dirty AppInfoArea', 0x04),
|
||||||
|
('Backup this database', 0x08),
|
||||||
|
('Okay to install newer over existing copy, if present on PalmPilot', 0x10),
|
||||||
|
('Force the PalmPilot to reset after this database is installed', 0x12),
|
||||||
|
('Don\'t allow copy of file to be beamed to other Pilot',
|
||||||
|
0x14)]:
|
||||||
|
self.attributes.append(PalmDOCAttributes.Attr(name, field,
|
||||||
|
self.val))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
attrs = '\n\t'.join([str(x) for x in self.attributes])
|
||||||
|
return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
|
||||||
|
|
||||||
|
class PalmDB(object):
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.raw = raw
|
||||||
|
|
||||||
|
if self.raw.startswith(b'TPZ'):
|
||||||
|
raise ValueError('This is a Topaz file')
|
||||||
|
|
||||||
|
self.name = self.raw[:32].replace(b'\x00', b'')
|
||||||
|
self.attributes = PalmDOCAttributes(self.raw[32:34])
|
||||||
|
self.version = struct.unpack(b'>H', self.raw[34:36])[0]
|
||||||
|
|
||||||
|
palm_epoch = datetime.datetime(1904, 1, 1, tzinfo=utc_tz)
|
||||||
|
self.creation_date_raw = struct.unpack(b'>I', self.raw[36:40])[0]
|
||||||
|
self.creation_date = (palm_epoch +
|
||||||
|
datetime.timedelta(seconds=self.creation_date_raw))
|
||||||
|
self.modification_date_raw = struct.unpack(b'>I', self.raw[40:44])[0]
|
||||||
|
self.modification_date = (palm_epoch +
|
||||||
|
datetime.timedelta(seconds=self.modification_date_raw))
|
||||||
|
self.last_backup_date_raw = struct.unpack(b'>I', self.raw[44:48])[0]
|
||||||
|
self.last_backup_date = (palm_epoch +
|
||||||
|
datetime.timedelta(seconds=self.last_backup_date_raw))
|
||||||
|
self.modification_number = struct.unpack(b'>I', self.raw[48:52])[0]
|
||||||
|
self.app_info_id = self.raw[52:56]
|
||||||
|
self.sort_info_id = self.raw[56:60]
|
||||||
|
self.type = self.raw[60:64]
|
||||||
|
self.creator = self.raw[64:68]
|
||||||
|
self.ident = self.type + self.creator
|
||||||
|
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
|
||||||
|
raise ValueError('Unknown book ident: %r'%self.ident)
|
||||||
|
self.last_record_uid, = struct.unpack(b'>I', self.raw[68:72])
|
||||||
|
self.next_rec_list_id = self.raw[72:76]
|
||||||
|
|
||||||
|
self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' PalmDB Header '+ '*'*20]
|
||||||
|
ans.append('Name: %r'%self.name)
|
||||||
|
ans.append(str(self.attributes))
|
||||||
|
ans.append('Version: %s'%self.version)
|
||||||
|
ans.append('Creation date: %s (%s)'%(self.creation_date.isoformat(),
|
||||||
|
self.creation_date_raw))
|
||||||
|
ans.append('Modification date: %s (%s)'%(self.modification_date.isoformat(),
|
||||||
|
self.modification_date_raw))
|
||||||
|
ans.append('Backup date: %s (%s)'%(self.last_backup_date.isoformat(),
|
||||||
|
self.last_backup_date_raw))
|
||||||
|
ans.append('Modification number: %s'%self.modification_number)
|
||||||
|
ans.append('App Info ID: %r'%self.app_info_id)
|
||||||
|
ans.append('Sort Info ID: %r'%self.sort_info_id)
|
||||||
|
ans.append('Type: %r'%self.type)
|
||||||
|
ans.append('Creator: %r'%self.creator)
|
||||||
|
ans.append('Last record UID +1: %r'%self.last_record_uid)
|
||||||
|
ans.append('Next record list id: %r'%self.next_rec_list_id)
|
||||||
|
ans.append('Number of records: %s'%self.number_of_records)
|
||||||
|
|
||||||
|
return '\n'.join(ans)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class Record(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, raw, header):
|
||||||
|
self.offset, self.flags, self.uid = header
|
||||||
|
self.raw = raw
|
||||||
|
|
||||||
|
@property
|
||||||
|
def header(self):
|
||||||
|
return 'Offset: %d Flags: %d UID: %d First 4 bytes: %r Size: %d'%(self.offset, self.flags,
|
||||||
|
self.uid, self.raw[:4], len(self.raw))
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# EXTH {{{
|
||||||
|
class EXTHRecord(object):
|
||||||
|
|
||||||
|
def __init__(self, type_, data):
|
||||||
|
self.type = type_
|
||||||
|
self.data = data
|
||||||
|
self.name = {
|
||||||
|
1 : 'DRM Server id',
|
||||||
|
2 : 'DRM Commerce id',
|
||||||
|
3 : 'DRM ebookbase book id',
|
||||||
|
100 : 'author',
|
||||||
|
101 : 'publisher',
|
||||||
|
102 : 'imprint',
|
||||||
|
103 : 'description',
|
||||||
|
104 : 'isbn',
|
||||||
|
105 : 'subject',
|
||||||
|
106 : 'publishingdate',
|
||||||
|
107 : 'review',
|
||||||
|
108 : 'contributor',
|
||||||
|
109 : 'rights',
|
||||||
|
110 : 'subjectcode',
|
||||||
|
111 : 'type',
|
||||||
|
112 : 'source',
|
||||||
|
113 : 'asin',
|
||||||
|
114 : 'versionnumber',
|
||||||
|
115 : 'sample',
|
||||||
|
116 : 'startreading',
|
||||||
|
117 : 'adult',
|
||||||
|
118 : 'retailprice',
|
||||||
|
119 : 'retailpricecurrency',
|
||||||
|
121 : 'KF8 header section index',
|
||||||
|
125 : 'KF8 resources (images/fonts) count',
|
||||||
|
129 : 'KF8 cover URI',
|
||||||
|
131 : 'KF8 unknown count',
|
||||||
|
201 : 'coveroffset',
|
||||||
|
202 : 'thumboffset',
|
||||||
|
203 : 'hasfakecover',
|
||||||
|
204 : 'Creator Software',
|
||||||
|
205 : 'Creator Major Version', # '>I'
|
||||||
|
206 : 'Creator Minor Version', # '>I'
|
||||||
|
207 : 'Creator Build Number', # '>I'
|
||||||
|
208 : 'watermark',
|
||||||
|
209 : 'tamper_proof_keys',
|
||||||
|
300 : 'fontsignature',
|
||||||
|
301 : 'clippinglimit', # percentage '>B'
|
||||||
|
402 : 'publisherlimit',
|
||||||
|
404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled
|
||||||
|
501 : 'cdetype', # 4 chars (PDOC or EBOK)
|
||||||
|
502 : 'lastupdatetime',
|
||||||
|
503 : 'updatedtitle',
|
||||||
|
}.get(self.type, repr(self.type))
|
||||||
|
|
||||||
|
if (self.name in {'coveroffset', 'thumboffset', 'hasfakecover',
|
||||||
|
'Creator Major Version', 'Creator Minor Version',
|
||||||
|
'Creator Build Number', 'Creator Software', 'startreading'} or
|
||||||
|
self.type in {121, 125, 131}):
|
||||||
|
self.data, = struct.unpack(b'>I', self.data)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '%s (%d): %r'%(self.name, self.type, self.data)
|
||||||
|
|
||||||
|
class EXTHHeader(object):
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.raw = raw
|
||||||
|
if not self.raw.startswith(b'EXTH'):
|
||||||
|
raise ValueError('EXTH header does not start with EXTH')
|
||||||
|
self.length, = struct.unpack(b'>I', self.raw[4:8])
|
||||||
|
self.count, = struct.unpack(b'>I', self.raw[8:12])
|
||||||
|
|
||||||
|
pos = 12
|
||||||
|
self.records = []
|
||||||
|
for i in xrange(self.count):
|
||||||
|
pos = self.read_record(pos)
|
||||||
|
self.records.sort(key=lambda x:x.type)
|
||||||
|
self.rmap = {x.type:x for x in self.records}
|
||||||
|
|
||||||
|
def __getitem__(self, type_):
|
||||||
|
return self.rmap.__getitem__(type_).data
|
||||||
|
|
||||||
|
def get(self, type_, default=None):
|
||||||
|
ans = self.rmap.get(type_, default)
|
||||||
|
return getattr(ans, 'data', default)
|
||||||
|
|
||||||
|
def read_record(self, pos):
|
||||||
|
type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
|
||||||
|
data = self.raw[(pos+8):(pos+length)]
|
||||||
|
self.records.append(EXTHRecord(type_, data))
|
||||||
|
return pos + length
|
||||||
|
|
||||||
|
@property
|
||||||
|
def kf8_header_index(self):
|
||||||
|
return self.get(121, None)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' EXTH Header '+ '*'*20]
|
||||||
|
ans.append('EXTH header length: %d'%self.length)
|
||||||
|
ans.append('Number of EXTH records: %d'%self.count)
|
||||||
|
ans.append('EXTH records...')
|
||||||
|
for r in self.records:
|
||||||
|
ans.append(str(r))
|
||||||
|
return '\n'.join(ans)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class MOBIHeader(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, record0, offset):
|
||||||
|
self.raw = record0.raw
|
||||||
|
self.header_offset = offset
|
||||||
|
|
||||||
|
self.compression_raw = self.raw[:2]
|
||||||
|
self.compression = {1: 'No compression', 2: 'PalmDoc compression',
|
||||||
|
17480: 'HUFF/CDIC compression'}.get(struct.unpack(b'>H',
|
||||||
|
self.compression_raw)[0],
|
||||||
|
repr(self.compression_raw))
|
||||||
|
self.unused = self.raw[2:4]
|
||||||
|
self.text_length, = struct.unpack(b'>I', self.raw[4:8])
|
||||||
|
self.number_of_text_records, self.text_record_size = \
|
||||||
|
struct.unpack(b'>HH', self.raw[8:12])
|
||||||
|
self.encryption_type_raw, = struct.unpack(b'>H', self.raw[12:14])
|
||||||
|
self.encryption_type = {
|
||||||
|
0: 'No encryption',
|
||||||
|
1: 'Old mobipocket encryption',
|
||||||
|
2: 'Mobipocket encryption'
|
||||||
|
}.get(self.encryption_type_raw, repr(self.encryption_type_raw))
|
||||||
|
self.unknown = self.raw[14:16]
|
||||||
|
|
||||||
|
self.identifier = self.raw[16:20]
|
||||||
|
if self.identifier != b'MOBI':
|
||||||
|
raise ValueError('Identifier %r unknown'%self.identifier)
|
||||||
|
|
||||||
|
self.length, = struct.unpack(b'>I', self.raw[20:24])
|
||||||
|
self.type_raw, = struct.unpack(b'>I', self.raw[24:28])
|
||||||
|
self.type = {
|
||||||
|
2 : 'Mobipocket book',
|
||||||
|
3 : 'PalmDOC book',
|
||||||
|
4 : 'Audio',
|
||||||
|
257 : 'News',
|
||||||
|
258 : 'News Feed',
|
||||||
|
259 : 'News magazine',
|
||||||
|
513 : 'PICS',
|
||||||
|
514 : 'Word',
|
||||||
|
515 : 'XLS',
|
||||||
|
516 : 'PPT',
|
||||||
|
517 : 'TEXT',
|
||||||
|
518 : 'HTML',
|
||||||
|
}.get(self.type_raw, repr(self.type_raw))
|
||||||
|
|
||||||
|
self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
|
||||||
|
self.encoding = {
|
||||||
|
1252 : 'cp1252',
|
||||||
|
65001: 'utf-8',
|
||||||
|
}.get(self.encoding_raw, repr(self.encoding_raw))
|
||||||
|
self.uid = self.raw[32:36]
|
||||||
|
self.file_version, = struct.unpack(b'>I', self.raw[36:40])
|
||||||
|
self.meta_orth_indx, self.meta_infl_indx = struct.unpack(
|
||||||
|
b'>II', self.raw[40:48])
|
||||||
|
self.secondary_index_record, = struct.unpack(b'>I', self.raw[48:52])
|
||||||
|
self.reserved = self.raw[52:80]
|
||||||
|
self.first_non_book_record, = struct.unpack(b'>I', self.raw[80:84])
|
||||||
|
self.fullname_offset, = struct.unpack(b'>I', self.raw[84:88])
|
||||||
|
self.fullname_length, = struct.unpack(b'>I', self.raw[88:92])
|
||||||
|
self.locale_raw, = struct.unpack(b'>I', self.raw[92:96])
|
||||||
|
langcode = self.locale_raw
|
||||||
|
langid = langcode & 0xFF
|
||||||
|
sublangid = (langcode >> 10) & 0xFF
|
||||||
|
self.language = main_language.get(langid, 'ENGLISH')
|
||||||
|
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
||||||
|
|
||||||
|
self.input_language = self.raw[96:100]
|
||||||
|
self.output_langauage = self.raw[100:104]
|
||||||
|
self.min_version, = struct.unpack(b'>I', self.raw[104:108])
|
||||||
|
self.first_image_index, = struct.unpack(b'>I', self.raw[108:112])
|
||||||
|
self.huffman_record_offset, = struct.unpack(b'>I', self.raw[112:116])
|
||||||
|
self.huffman_record_count, = struct.unpack(b'>I', self.raw[116:120])
|
||||||
|
self.datp_record_offset, = struct.unpack(b'>I', self.raw[120:124])
|
||||||
|
self.datp_record_count, = struct.unpack(b'>I', self.raw[124:128])
|
||||||
|
self.exth_flags, = struct.unpack(b'>I', self.raw[128:132])
|
||||||
|
self.has_exth = bool(self.exth_flags & 0x40)
|
||||||
|
self.has_drm_data = self.length >= 174 and len(self.raw) >= 180
|
||||||
|
if self.has_drm_data:
|
||||||
|
self.unknown3 = self.raw[132:164]
|
||||||
|
self.drm_offset, = struct.unpack(b'>I', self.raw[164:168])
|
||||||
|
self.drm_count, = struct.unpack(b'>I', self.raw[168:172])
|
||||||
|
self.drm_size, = struct.unpack(b'>I', self.raw[172:176])
|
||||||
|
self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
|
||||||
|
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
|
||||||
|
self.has_fcis_flis = False
|
||||||
|
self.has_multibytes = self.has_indexing_bytes = self.has_uncrossable_breaks = False
|
||||||
|
self.extra_data_flags = 0
|
||||||
|
if self.has_extra_data_flags:
|
||||||
|
self.unknown4 = self.raw[180:192]
|
||||||
|
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>II',
|
||||||
|
self.raw, 192)
|
||||||
|
(self.fcis_number, self.fcis_count, self.flis_number,
|
||||||
|
self.flis_count) = struct.unpack(b'>IIII',
|
||||||
|
self.raw[200:216])
|
||||||
|
self.unknown6 = self.raw[216:224]
|
||||||
|
self.srcs_record_index = struct.unpack(b'>I',
|
||||||
|
self.raw[224:228])[0]
|
||||||
|
self.num_srcs_records = struct.unpack(b'>I',
|
||||||
|
self.raw[228:232])[0]
|
||||||
|
self.unknown7 = self.raw[232:240]
|
||||||
|
self.extra_data_flags = struct.unpack(b'>I',
|
||||||
|
self.raw[240:244])[0]
|
||||||
|
self.has_multibytes = bool(self.extra_data_flags & 0b1)
|
||||||
|
self.has_indexing_bytes = bool(self.extra_data_flags & 0b10)
|
||||||
|
self.has_uncrossable_breaks = bool(self.extra_data_flags & 0b100)
|
||||||
|
self.primary_index_record, = struct.unpack(b'>I',
|
||||||
|
self.raw[244:248])
|
||||||
|
|
||||||
|
if self.file_version >= 8:
|
||||||
|
(self.sect_idx, self.skel_idx, self.datp_idx, self.oth_idx
|
||||||
|
) = struct.unpack_from(b'>4L', self.raw, 248)
|
||||||
|
self.unknown9 = self.raw[264:self.length]
|
||||||
|
if self.meta_orth_indx != self.sect_idx:
|
||||||
|
raise ValueError('KF8 header has different Meta orth and '
|
||||||
|
'section indices')
|
||||||
|
|
||||||
|
# The following are all relative to the position of the header record
|
||||||
|
# make them absolute for ease of debugging
|
||||||
|
for x in ('sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
|
||||||
|
'meta_orth_indx', 'huffman_record_offset',
|
||||||
|
'first_non_book_record', 'datp_record_offset', 'fcis_number',
|
||||||
|
'flis_number', 'primary_index_record', 'fdst_idx',
|
||||||
|
'first_image_index'):
|
||||||
|
if hasattr(self, x):
|
||||||
|
setattr(self, x, self.header_offset+getattr(self, x))
|
||||||
|
|
||||||
|
if self.has_exth:
|
||||||
|
self.exth_offset = 16 + self.length
|
||||||
|
|
||||||
|
self.exth = EXTHHeader(self.raw[self.exth_offset:])
|
||||||
|
|
||||||
|
self.end_of_exth = self.exth_offset + self.exth.length
|
||||||
|
self.bytes_after_exth = self.raw[self.end_of_exth:self.fullname_offset]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
|
||||||
|
a = ans.append
|
||||||
|
i = lambda d, x : a('%s (null value: %d): %d'%(d, NULL_INDEX, x))
|
||||||
|
ans.append('Compression: %s'%self.compression)
|
||||||
|
ans.append('Unused: %r'%self.unused)
|
||||||
|
ans.append('Number of text records: %d'%self.number_of_text_records)
|
||||||
|
ans.append('Text record size: %d'%self.text_record_size)
|
||||||
|
ans.append('Encryption: %s'%self.encryption_type)
|
||||||
|
ans.append('Unknown: %r'%self.unknown)
|
||||||
|
ans.append('Identifier: %r'%self.identifier)
|
||||||
|
ans.append('Header length: %d'% self.length)
|
||||||
|
ans.append('Type: %s'%self.type)
|
||||||
|
ans.append('Encoding: %s'%self.encoding)
|
||||||
|
ans.append('UID: %r'%self.uid)
|
||||||
|
ans.append('File version: %d'%self.file_version)
|
||||||
|
i('Meta Orth Index (Sections index in KF8)', self.meta_orth_indx)
|
||||||
|
i('Meta Infl Index', self.meta_infl_indx)
|
||||||
|
ans.append('Secondary index record: %d (null val: %d)'%(
|
||||||
|
self.secondary_index_record, NULL_INDEX))
|
||||||
|
ans.append('Reserved: %r'%self.reserved)
|
||||||
|
ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX,
|
||||||
|
self.first_non_book_record))
|
||||||
|
ans.append('Full name offset: %d'%self.fullname_offset)
|
||||||
|
ans.append('Full name length: %d bytes'%self.fullname_length)
|
||||||
|
ans.append('Langcode: %r'%self.locale_raw)
|
||||||
|
ans.append('Language: %s'%self.language)
|
||||||
|
ans.append('Sub language: %s'%self.sublanguage)
|
||||||
|
ans.append('Input language: %r'%self.input_language)
|
||||||
|
ans.append('Output language: %r'%self.output_langauage)
|
||||||
|
ans.append('Min version: %d'%self.min_version)
|
||||||
|
ans.append('First Image index: %d'%self.first_image_index)
|
||||||
|
ans.append('Huffman record offset: %d'%self.huffman_record_offset)
|
||||||
|
ans.append('Huffman record count: %d'%self.huffman_record_count)
|
||||||
|
ans.append('DATP record offset: %r'%self.datp_record_offset)
|
||||||
|
ans.append('DATP record count: %r'%self.datp_record_count)
|
||||||
|
ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
|
||||||
|
if self.has_drm_data:
|
||||||
|
ans.append('Unknown3: %r'%self.unknown3)
|
||||||
|
ans.append('DRM Offset: %s'%self.drm_offset)
|
||||||
|
ans.append('DRM Count: %s'%self.drm_count)
|
||||||
|
ans.append('DRM Size: %s'%self.drm_size)
|
||||||
|
ans.append('DRM Flags: %r'%self.drm_flags)
|
||||||
|
if self.has_extra_data_flags:
|
||||||
|
ans.append('Unknown4: %r'%self.unknown4)
|
||||||
|
ans.append('FDST Index: %d'% self.fdst_idx)
|
||||||
|
ans.append('FDST Count: %d'% self.fdst_count)
|
||||||
|
ans.append('FCIS number: %d'% self.fcis_number)
|
||||||
|
ans.append('FCIS count: %d'% self.fcis_count)
|
||||||
|
ans.append('FLIS number: %d'% self.flis_number)
|
||||||
|
ans.append('FLIS count: %d'% self.flis_count)
|
||||||
|
ans.append('Unknown6: %r'% self.unknown6)
|
||||||
|
ans.append('SRCS record index: %d'%self.srcs_record_index)
|
||||||
|
ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
|
||||||
|
ans.append('Unknown7: %r'%self.unknown7)
|
||||||
|
ans.append(('Extra data flags: %s (has multibyte: %s) '
|
||||||
|
'(has indexing: %s) (has uncrossable breaks: %s)')%(
|
||||||
|
bin(self.extra_data_flags), self.has_multibytes,
|
||||||
|
self.has_indexing_bytes, self.has_uncrossable_breaks ))
|
||||||
|
ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
|
||||||
|
self.primary_index_record))
|
||||||
|
if self.file_version >= 8:
|
||||||
|
i('Sections Index', self.sect_idx)
|
||||||
|
i('SKEL Index', self.skel_idx)
|
||||||
|
i('DATP Index', self.datp_idx)
|
||||||
|
i('Other Index', self.oth_idx)
|
||||||
|
if self.unknown9:
|
||||||
|
a('Unknown9: %r'%self.unknown9)
|
||||||
|
|
||||||
|
ans = '\n'.join(ans)
|
||||||
|
|
||||||
|
if self.has_exth:
|
||||||
|
ans += '\n\n' + str(self.exth)
|
||||||
|
ans += '\n\nBytes after EXTH (%d bytes): %s'%(
|
||||||
|
len(self.bytes_after_exth),
|
||||||
|
format_bytes(self.bytes_after_exth))
|
||||||
|
|
||||||
|
ans += '\nNumber of bytes after full name: %d' % (len(self.raw) - (self.fullname_offset +
|
||||||
|
self.fullname_length))
|
||||||
|
|
||||||
|
ans += '\nRecord 0 length: %d'%len(self.raw)
|
||||||
|
return ans
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class MOBIFile(object):
|
||||||
|
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.raw = stream.read()
|
||||||
|
self.palmdb = PalmDB(self.raw[:78])
|
||||||
|
|
||||||
|
self.record_headers = []
|
||||||
|
self.records = []
|
||||||
|
for i in xrange(self.palmdb.number_of_records):
|
||||||
|
pos = 78 + i * 8
|
||||||
|
offset, a1, a2, a3, a4 = struct.unpack(b'>LBBBB', self.raw[pos:pos+8])
|
||||||
|
flags, val = a1, a2 << 16 | a3 << 8 | a4
|
||||||
|
self.record_headers.append((offset, flags, val))
|
||||||
|
|
||||||
|
def section(section_number):
|
||||||
|
if section_number == self.palmdb.number_of_records - 1:
|
||||||
|
end_off = len(self.raw)
|
||||||
|
else:
|
||||||
|
end_off = self.record_headers[section_number + 1][0]
|
||||||
|
off = self.record_headers[section_number][0]
|
||||||
|
return self.raw[off:end_off]
|
||||||
|
|
||||||
|
for i in range(self.palmdb.number_of_records):
|
||||||
|
self.records.append(Record(section(i), self.record_headers[i]))
|
||||||
|
|
||||||
|
self.mobi_header = MOBIHeader(self.records[0], 0)
|
||||||
|
self.huffman_record_nums = []
|
||||||
|
|
||||||
|
self.kf8_type = None
|
||||||
|
mh = mh8 = self.mobi_header
|
||||||
|
if mh.file_version >= 8:
|
||||||
|
self.kf8_type = 'standalone'
|
||||||
|
elif mh.has_exth and mh.exth.kf8_header_index is not None:
|
||||||
|
self.kf8_type = 'joint'
|
||||||
|
kf8i = mh.exth.kf8_header_index
|
||||||
|
mh8 = MOBIHeader(self.records[kf8i], kf8i)
|
||||||
|
self.mobi8_header = mh8
|
||||||
|
|
||||||
|
if 'huff' in self.mobi_header.compression.lower():
|
||||||
|
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||||
|
|
||||||
|
def huffit(off, cnt):
|
||||||
|
huffman_record_nums = list(xrange(off, off+cnt))
|
||||||
|
huffrecs = [self.records[r].raw for r in huffman_record_nums]
|
||||||
|
huffs = HuffReader(huffrecs)
|
||||||
|
return huffman_record_nums, huffs.unpack
|
||||||
|
|
||||||
|
if self.kf8_type == 'joint':
|
||||||
|
recs6, d6 = huffit(mh.huffman_record_offset,
|
||||||
|
mh.huffman_record_count)
|
||||||
|
recs8, d8 = huffit(mh8.huffman_record_offset,
|
||||||
|
mh8.huffman_record_count)
|
||||||
|
self.huffman_record_nums = recs6 + recs8
|
||||||
|
else:
|
||||||
|
self.huffman_record_nums, d6 = huffit(mh.huffman_record_offset,
|
||||||
|
mh.huffman_record_count)
|
||||||
|
d8 = d6
|
||||||
|
elif 'palmdoc' in self.mobi_header.compression.lower():
|
||||||
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
|
d8 = d6 = decompress_doc
|
||||||
|
else:
|
||||||
|
d8 = d6 = lambda x: x
|
||||||
|
|
||||||
|
self.decompress6, self.decompress8 = d6, d8
|
||||||
|
|
||||||
|
class TextRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record, extra_data_flags, decompress):
|
||||||
|
self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
|
||||||
|
raw_trailing_bytes = record.raw[len(self.raw):]
|
||||||
|
self.raw = decompress(self.raw)
|
||||||
|
|
||||||
|
if 0 in self.trailing_data:
|
||||||
|
self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
|
||||||
|
if 1 in self.trailing_data:
|
||||||
|
self.trailing_data['indexing'] = self.trailing_data.pop(1)
|
||||||
|
if 2 in self.trailing_data:
|
||||||
|
self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
|
||||||
|
self.trailing_data['raw_bytes'] = raw_trailing_bytes
|
||||||
|
|
||||||
|
for typ, val in self.trailing_data.iteritems():
|
||||||
|
if isinstance(typ, int):
|
||||||
|
print ('Record %d has unknown trailing data of type: %d : %r'%
|
||||||
|
(idx, typ, val))
|
||||||
|
|
||||||
|
self.idx = idx
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
name = '%06d'%self.idx
|
||||||
|
with open(os.path.join(folder, name+'.txt'), 'wb') as f:
|
||||||
|
f.write(self.raw)
|
||||||
|
with open(os.path.join(folder, name+'.trailing_data'), 'wb') as f:
|
||||||
|
for k, v in self.trailing_data.iteritems():
|
||||||
|
raw = '%s : %r\n\n'%(k, v)
|
||||||
|
f.write(raw.encode('utf-8'))
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
48
src/calibre/ebooks/mobi/debug/main.py
Normal file
48
src/calibre/ebooks/mobi/debug/main.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os, shutil
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.debug.headers import MOBIFile
|
||||||
|
from calibre.ebooks.mobi.debug.mobi6 import inspect_mobi as inspect_mobi6
|
||||||
|
from calibre.ebooks.mobi.debug.mobi8 import inspect_mobi as inspect_mobi8
|
||||||
|
|
||||||
|
def inspect_mobi(path_or_stream, ddir=None): # {{{
|
||||||
|
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
|
||||||
|
open(path_or_stream, 'rb'))
|
||||||
|
f = MOBIFile(stream)
|
||||||
|
if ddir is None:
|
||||||
|
ddir = 'decompiled_' + os.path.splitext(os.path.basename(stream.name))[0]
|
||||||
|
try:
|
||||||
|
shutil.rmtree(ddir)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
os.makedirs(ddir)
|
||||||
|
if f.kf8_type is None:
|
||||||
|
inspect_mobi6(f, ddir)
|
||||||
|
elif f.kf8_type == 'joint':
|
||||||
|
p6 = os.path.join(ddir, 'mobi6')
|
||||||
|
os.mkdir(p6)
|
||||||
|
inspect_mobi6(f, p6)
|
||||||
|
p8 = os.path.join(ddir, 'mobi8')
|
||||||
|
os.mkdir(p8)
|
||||||
|
inspect_mobi8(f, p8)
|
||||||
|
else:
|
||||||
|
inspect_mobi8(f, ddir)
|
||||||
|
|
||||||
|
print ('Debug data saved to:', ddir)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def main():
|
||||||
|
inspect_mobi(sys.argv[1])
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
839
src/calibre/ebooks/mobi/debug/mobi6.py
Normal file
839
src/calibre/ebooks/mobi/debug/mobi6.py
Normal file
@ -0,0 +1,839 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import struct, sys, os
|
||||||
|
from collections import OrderedDict, defaultdict
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
|
from calibre.ebooks.mobi.reader.index import (parse_index_record,
|
||||||
|
parse_tagx_section)
|
||||||
|
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||||
|
decode_tbs, read_font_record)
|
||||||
|
from calibre.utils.magick.draw import identify_data
|
||||||
|
from calibre.ebooks.mobi.debug import format_bytes
|
||||||
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
|
|
||||||
|
|
||||||
|
class TagX(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, tag, num_values, bitmask, eof):
|
||||||
|
self.tag, self.num_values, self.bitmask, self.eof = (tag, num_values,
|
||||||
|
bitmask, eof)
|
||||||
|
self.num_of_values = num_values
|
||||||
|
self.is_eof = (self.eof == 1 and self.tag == 0 and self.num_values == 0
|
||||||
|
and self.bitmask == 0)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'TAGX(tag=%02d, num_values=%d, bitmask=%r, eof=%d)' % (self.tag,
|
||||||
|
self.num_values, bin(self.bitmask), self.eof)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class SecondaryIndexHeader(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, record):
|
||||||
|
self.record = record
|
||||||
|
raw = self.record.raw
|
||||||
|
#open('/t/index_header.bin', 'wb').write(raw)
|
||||||
|
if raw[:4] != b'INDX':
|
||||||
|
raise ValueError('Invalid Secondary Index Record')
|
||||||
|
self.header_length, = struct.unpack('>I', raw[4:8])
|
||||||
|
self.unknown1 = raw[8:16]
|
||||||
|
self.index_type, = struct.unpack('>I', raw[16:20])
|
||||||
|
self.index_type_desc = {0: 'normal', 2:
|
||||||
|
'inflection', 6: 'calibre'}.get(self.index_type, 'unknown')
|
||||||
|
self.idxt_start, = struct.unpack('>I', raw[20:24])
|
||||||
|
self.index_count, = struct.unpack('>I', raw[24:28])
|
||||||
|
self.index_encoding_num, = struct.unpack('>I', raw[28:32])
|
||||||
|
self.index_encoding = {65001: 'utf-8', 1252:
|
||||||
|
'cp1252'}.get(self.index_encoding_num, 'unknown')
|
||||||
|
if self.index_encoding == 'unknown':
|
||||||
|
raise ValueError(
|
||||||
|
'Unknown index encoding: %d'%self.index_encoding_num)
|
||||||
|
self.unknown2 = raw[32:36]
|
||||||
|
self.num_index_entries, = struct.unpack('>I', raw[36:40])
|
||||||
|
self.ordt_start, = struct.unpack('>I', raw[40:44])
|
||||||
|
self.ligt_start, = struct.unpack('>I', raw[44:48])
|
||||||
|
self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52])
|
||||||
|
self.num_of_cncx_blocks, = struct.unpack('>I', raw[52:56])
|
||||||
|
self.unknown3 = raw[56:180]
|
||||||
|
self.tagx_offset, = struct.unpack(b'>I', raw[180:184])
|
||||||
|
if self.tagx_offset != self.header_length:
|
||||||
|
raise ValueError('TAGX offset and header length disagree')
|
||||||
|
self.unknown4 = raw[184:self.header_length]
|
||||||
|
|
||||||
|
tagx = raw[self.header_length:]
|
||||||
|
if not tagx.startswith(b'TAGX'):
|
||||||
|
raise ValueError('Invalid TAGX section')
|
||||||
|
self.tagx_header_length, = struct.unpack('>I', tagx[4:8])
|
||||||
|
self.tagx_control_byte_count, = struct.unpack('>I', tagx[8:12])
|
||||||
|
self.tagx_entries = [TagX(*x) for x in parse_tagx_section(tagx)[1]]
|
||||||
|
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
||||||
|
raise ValueError('TAGX last entry is not EOF')
|
||||||
|
|
||||||
|
idxt0_pos = self.header_length+self.tagx_header_length
|
||||||
|
num = ord(raw[idxt0_pos])
|
||||||
|
count_pos = idxt0_pos+1+num
|
||||||
|
self.last_entry = raw[idxt0_pos+1:count_pos]
|
||||||
|
self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2])
|
||||||
|
|
||||||
|
# There may be some alignment zero bytes between the end of the idxt0
|
||||||
|
# and self.idxt_start
|
||||||
|
idxt = raw[self.idxt_start:]
|
||||||
|
if idxt[:4] != b'IDXT':
|
||||||
|
raise ValueError('Invalid IDXT header')
|
||||||
|
length_check, = struct.unpack(b'>H', idxt[4:6])
|
||||||
|
if length_check != self.header_length + self.tagx_header_length:
|
||||||
|
raise ValueError('Length check failed')
|
||||||
|
if idxt[6:].replace(b'\0', b''):
|
||||||
|
raise ValueError('Non null trailing bytes after IDXT')
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' Secondary Index Header '+ '*'*20]
|
||||||
|
a = ans.append
|
||||||
|
def u(w):
|
||||||
|
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
|
||||||
|
len(w), not bool(w.replace(b'\0', b'')) ))
|
||||||
|
|
||||||
|
a('Header length: %d'%self.header_length)
|
||||||
|
u(self.unknown1)
|
||||||
|
a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type))
|
||||||
|
a('Offset to IDXT start: %d'%self.idxt_start)
|
||||||
|
a('Number of index records: %d'%self.index_count)
|
||||||
|
a('Index encoding: %s (%d)'%(self.index_encoding,
|
||||||
|
self.index_encoding_num))
|
||||||
|
u(self.unknown2)
|
||||||
|
a('Number of index entries: %d'% self.num_index_entries)
|
||||||
|
a('ORDT start: %d'%self.ordt_start)
|
||||||
|
a('LIGT start: %d'%self.ligt_start)
|
||||||
|
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
|
||||||
|
a('Number of cncx blocks: %d'%self.num_of_cncx_blocks)
|
||||||
|
u(self.unknown3)
|
||||||
|
a('TAGX offset: %d'%self.tagx_offset)
|
||||||
|
u(self.unknown4)
|
||||||
|
a('\n\n')
|
||||||
|
a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20)
|
||||||
|
a('Header length: %d'%self.tagx_header_length)
|
||||||
|
a('Control byte count: %d'%self.tagx_control_byte_count)
|
||||||
|
for i in self.tagx_entries:
|
||||||
|
a('\t' + repr(i))
|
||||||
|
a('Index of last IndexEntry in secondary index record: %s'% self.last_entry)
|
||||||
|
a('Number of entries in the NCX: %d'% self.ncx_count)
|
||||||
|
|
||||||
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class IndexHeader(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, record):
|
||||||
|
self.record = record
|
||||||
|
raw = self.record.raw
|
||||||
|
#open('/t/index_header.bin', 'wb').write(raw)
|
||||||
|
if raw[:4] != b'INDX':
|
||||||
|
raise ValueError('Invalid Primary Index Record')
|
||||||
|
|
||||||
|
self.header_length, = struct.unpack('>I', raw[4:8])
|
||||||
|
self.unknown1 = raw[8:12]
|
||||||
|
self.header_type, = struct.unpack('>I', raw[12:16])
|
||||||
|
self.index_type, = struct.unpack('>I', raw[16:20])
|
||||||
|
self.index_type_desc = {0: 'normal', 2:
|
||||||
|
'inflection', 6: 'calibre'}.get(self.index_type, 'unknown')
|
||||||
|
self.idxt_start, = struct.unpack('>I', raw[20:24])
|
||||||
|
self.index_count, = struct.unpack('>I', raw[24:28])
|
||||||
|
self.index_encoding_num, = struct.unpack('>I', raw[28:32])
|
||||||
|
self.index_encoding = {65001: 'utf-8', 1252:
|
||||||
|
'cp1252'}.get(self.index_encoding_num, 'unknown')
|
||||||
|
if self.index_encoding == 'unknown':
|
||||||
|
raise ValueError(
|
||||||
|
'Unknown index encoding: %d'%self.index_encoding_num)
|
||||||
|
self.possibly_language = raw[32:36]
|
||||||
|
self.num_index_entries, = struct.unpack('>I', raw[36:40])
|
||||||
|
self.ordt_start, = struct.unpack('>I', raw[40:44])
|
||||||
|
self.ligt_start, = struct.unpack('>I', raw[44:48])
|
||||||
|
self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52])
|
||||||
|
self.num_of_cncx_blocks, = struct.unpack('>I', raw[52:56])
|
||||||
|
self.unknown2 = raw[56:180]
|
||||||
|
self.tagx_offset, = struct.unpack(b'>I', raw[180:184])
|
||||||
|
if self.tagx_offset != self.header_length:
|
||||||
|
raise ValueError('TAGX offset and header length disagree')
|
||||||
|
self.unknown3 = raw[184:self.header_length]
|
||||||
|
|
||||||
|
tagx = raw[self.header_length:]
|
||||||
|
if not tagx.startswith(b'TAGX'):
|
||||||
|
raise ValueError('Invalid TAGX section')
|
||||||
|
self.tagx_header_length, = struct.unpack('>I', tagx[4:8])
|
||||||
|
self.tagx_control_byte_count, = struct.unpack('>I', tagx[8:12])
|
||||||
|
self.tagx_entries = [TagX(*x) for x in parse_tagx_section(tagx)[1]]
|
||||||
|
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
||||||
|
raise ValueError('TAGX last entry is not EOF')
|
||||||
|
|
||||||
|
idxt0_pos = self.header_length+self.tagx_header_length
|
||||||
|
last_num, consumed = decode_hex_number(raw[idxt0_pos:])
|
||||||
|
count_pos = idxt0_pos + consumed
|
||||||
|
self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2])
|
||||||
|
self.last_entry = last_num
|
||||||
|
|
||||||
|
if last_num != self.ncx_count - 1:
|
||||||
|
raise ValueError('Last id number in the NCX != NCX count - 1')
|
||||||
|
# There may be some alignment zero bytes between the end of the idxt0
|
||||||
|
# and self.idxt_start
|
||||||
|
|
||||||
|
idxt = raw[self.idxt_start:]
|
||||||
|
if idxt[:4] != b'IDXT':
|
||||||
|
raise ValueError('Invalid IDXT header')
|
||||||
|
length_check, = struct.unpack(b'>H', idxt[4:6])
|
||||||
|
if length_check != self.header_length + self.tagx_header_length:
|
||||||
|
raise ValueError('Length check failed')
|
||||||
|
if idxt[6:].replace(b'\0', b''):
|
||||||
|
raise ValueError('Non null trailing bytes after IDXT')
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' Index Header (%d bytes)'%len(self.record.raw)+ '*'*20]
|
||||||
|
a = ans.append
|
||||||
|
def u(w):
|
||||||
|
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
|
||||||
|
len(w), not bool(w.replace(b'\0', b'')) ))
|
||||||
|
|
||||||
|
a('Header length: %d'%self.header_length)
|
||||||
|
u(self.unknown1)
|
||||||
|
a('Header type: %d'%self.header_type)
|
||||||
|
a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type))
|
||||||
|
a('Offset to IDXT start: %d'%self.idxt_start)
|
||||||
|
a('Number of index records: %d'%self.index_count)
|
||||||
|
a('Index encoding: %s (%d)'%(self.index_encoding,
|
||||||
|
self.index_encoding_num))
|
||||||
|
a('Unknown (possibly language?): %r'%(self.possibly_language))
|
||||||
|
a('Number of index entries: %d'% self.num_index_entries)
|
||||||
|
a('ORDT start: %d'%self.ordt_start)
|
||||||
|
a('LIGT start: %d'%self.ligt_start)
|
||||||
|
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
|
||||||
|
a('Number of cncx blocks: %d'%self.num_of_cncx_blocks)
|
||||||
|
u(self.unknown2)
|
||||||
|
a('TAGX offset: %d'%self.tagx_offset)
|
||||||
|
u(self.unknown3)
|
||||||
|
a('\n\n')
|
||||||
|
a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20)
|
||||||
|
a('Header length: %d'%self.tagx_header_length)
|
||||||
|
a('Control byte count: %d'%self.tagx_control_byte_count)
|
||||||
|
for i in self.tagx_entries:
|
||||||
|
a('\t' + repr(i))
|
||||||
|
a('Index of last IndexEntry in primary index record: %s'% self.last_entry)
|
||||||
|
a('Number of entries in the NCX: %d'% self.ncx_count)
|
||||||
|
|
||||||
|
return '\n'.join(ans)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class Tag(object): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
Index entries are a collection of tags. Each tag is represented by this
|
||||||
|
class.
|
||||||
|
'''
|
||||||
|
|
||||||
|
TAG_MAP = {
|
||||||
|
1: ('offset', 'Offset in HTML'),
|
||||||
|
2: ('size', 'Size in HTML'),
|
||||||
|
3: ('label_offset', 'Label offset in CNCX'),
|
||||||
|
4: ('depth', 'Depth of this entry in TOC'),
|
||||||
|
5: ('class_offset', 'Class offset in CNCX'),
|
||||||
|
6: ('pos_fid', 'File Index'),
|
||||||
|
|
||||||
|
11: ('secondary', '[unknown, unknown, '
|
||||||
|
'tag type from TAGX in primary index header]'),
|
||||||
|
|
||||||
|
21: ('parent_index', 'Parent'),
|
||||||
|
22: ('first_child_index', 'First child'),
|
||||||
|
23: ('last_child_index', 'Last child'),
|
||||||
|
|
||||||
|
69 : ('image_index', 'Offset from first image record to the'
|
||||||
|
' image record associated with this entry'
|
||||||
|
' (masthead for periodical or thumbnail for'
|
||||||
|
' article entry).'),
|
||||||
|
70 : ('desc_offset', 'Description offset in cncx'),
|
||||||
|
71 : ('author_offset', 'Author offset in cncx'),
|
||||||
|
72 : ('image_caption_offset', 'Image caption offset in cncx'),
|
||||||
|
73 : ('image_attr_offset', 'Image attribution offset in cncx'),
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, tag_type, vals, cncx):
|
||||||
|
self.value = vals if len(vals) > 1 else vals[0] if vals else None
|
||||||
|
|
||||||
|
self.cncx_value = None
|
||||||
|
if tag_type in self.TAG_MAP:
|
||||||
|
self.attr, self.desc = self.TAG_MAP[tag_type]
|
||||||
|
else:
|
||||||
|
print ('Unknown tag value: %%s'%tag_type)
|
||||||
|
self.desc = '??Unknown (tag value: %d)'%tag_type
|
||||||
|
self.attr = 'unknown'
|
||||||
|
|
||||||
|
if '_offset' in self.attr:
|
||||||
|
self.cncx_value = cncx[self.value]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.cncx_value is not None:
|
||||||
|
return '%s : %r [%r]'%(self.desc, self.value, self.cncx_value)
|
||||||
|
return '%s : %r'%(self.desc, self.value)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class IndexEntry(object): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
The index is made up of entries, each of which is represented by an
|
||||||
|
instance of this class. Index entries typically point to offsets in the
|
||||||
|
HTML, specify HTML sizes and point to text strings in the CNCX that are
|
||||||
|
used in the navigation UI.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, ident, entry, cncx):
|
||||||
|
try:
|
||||||
|
self.index = int(ident, 16)
|
||||||
|
except ValueError:
|
||||||
|
self.index = ident
|
||||||
|
self.tags = [Tag(tag_type, vals, cncx) for tag_type, vals in
|
||||||
|
entry.iteritems()]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def label(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'label_offset':
|
||||||
|
return tag.cncx_value
|
||||||
|
return ''
|
||||||
|
|
||||||
|
@property
|
||||||
|
def offset(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'offset':
|
||||||
|
return tag.value
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def size(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'size':
|
||||||
|
return tag.value
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def depth(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'depth':
|
||||||
|
return tag.value
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parent_index(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'parent_index':
|
||||||
|
return tag.value
|
||||||
|
return -1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def first_child_index(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'first_child_index':
|
||||||
|
return tag.value
|
||||||
|
return -1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def last_child_index(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'last_child_index':
|
||||||
|
return tag.value
|
||||||
|
return -1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pos_fid(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'pos_fid':
|
||||||
|
return tag.value
|
||||||
|
return [0, 0]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['Index Entry(index=%s, length=%d)'%(
|
||||||
|
self.index, len(self.tags))]
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.value is not None:
|
||||||
|
ans.append('\t'+str(tag))
|
||||||
|
if self.first_child_index != -1:
|
||||||
|
ans.append('\tNumber of children: %d'%(self.last_child_index -
|
||||||
|
self.first_child_index + 1))
|
||||||
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class IndexRecord(object): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
Represents all indexing information in the MOBI, apart from indexing info
|
||||||
|
in the trailing data of the text records.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, records, index_header, cncx):
|
||||||
|
self.alltext = None
|
||||||
|
table = OrderedDict()
|
||||||
|
tags = [TagX(x.tag, x.num_values, x.bitmask, x.eof) for x in
|
||||||
|
index_header.tagx_entries]
|
||||||
|
for record in records:
|
||||||
|
raw = record.raw
|
||||||
|
|
||||||
|
if raw[:4] != b'INDX':
|
||||||
|
raise ValueError('Invalid Primary Index Record')
|
||||||
|
|
||||||
|
parse_index_record(table, record.raw,
|
||||||
|
index_header.tagx_control_byte_count, tags,
|
||||||
|
index_header.index_encoding, strict=True)
|
||||||
|
|
||||||
|
self.indices = []
|
||||||
|
|
||||||
|
for ident, entry in table.iteritems():
|
||||||
|
self.indices.append(IndexEntry(ident, entry, cncx))
|
||||||
|
|
||||||
|
def get_parent(self, index):
|
||||||
|
if index.depth < 1:
|
||||||
|
return None
|
||||||
|
parent_depth = index.depth - 1
|
||||||
|
for p in self.indices:
|
||||||
|
if p.depth != parent_depth:
|
||||||
|
continue
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' Index Entries (%d entries) '%len(self.indices)+ '*'*20]
|
||||||
|
a = ans.append
|
||||||
|
def u(w):
|
||||||
|
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
|
||||||
|
len(w), not bool(w.replace(b'\0', b'')) ))
|
||||||
|
for entry in self.indices:
|
||||||
|
offset = entry.offset
|
||||||
|
a(str(entry))
|
||||||
|
t = self.alltext
|
||||||
|
if offset is not None and self.alltext is not None:
|
||||||
|
a('\tHTML before offset: %r'%t[offset-50:offset])
|
||||||
|
a('\tHTML after offset: %r'%t[offset:offset+50])
|
||||||
|
p = offset+entry.size
|
||||||
|
a('\tHTML before end: %r'%t[p-50:p])
|
||||||
|
a('\tHTML after end: %r'%t[p:p+50])
|
||||||
|
|
||||||
|
a('')
|
||||||
|
|
||||||
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class CNCX(object): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
Parses the records that contain the compiled NCX (all strings from the
|
||||||
|
NCX). Presents a simple offset : string mapping interface to access the
|
||||||
|
data.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, records, codec):
|
||||||
|
self.records = OrderedDict()
|
||||||
|
record_offset = 0
|
||||||
|
for record in records:
|
||||||
|
raw = record.raw
|
||||||
|
pos = 0
|
||||||
|
while pos < len(raw):
|
||||||
|
length, consumed = decint(raw[pos:])
|
||||||
|
if length > 0:
|
||||||
|
try:
|
||||||
|
self.records[pos+record_offset] = raw[
|
||||||
|
pos+consumed:pos+consumed+length].decode(codec)
|
||||||
|
except:
|
||||||
|
byts = raw[pos:]
|
||||||
|
r = format_bytes(byts)
|
||||||
|
print ('CNCX entry at offset %d has unknown format %s'%(
|
||||||
|
pos+record_offset, r))
|
||||||
|
self.records[pos+record_offset] = r
|
||||||
|
pos = len(raw)
|
||||||
|
pos += consumed+length
|
||||||
|
record_offset += 0x10000
|
||||||
|
|
||||||
|
def __getitem__(self, offset):
|
||||||
|
return self.records.get(offset)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' cncx (%d strings) '%len(self.records)+ '*'*20]
|
||||||
|
for k, v in self.records.iteritems():
|
||||||
|
ans.append('%10d : %s'%(k, v))
|
||||||
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class ImageRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record, fmt):
|
||||||
|
self.raw = record.raw
|
||||||
|
self.fmt = fmt
|
||||||
|
self.idx = idx
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
name = '%06d'%self.idx
|
||||||
|
with open(os.path.join(folder, name+'.'+self.fmt), 'wb') as f:
|
||||||
|
f.write(self.raw)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class BinaryRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record):
|
||||||
|
self.raw = record.raw
|
||||||
|
sig = self.raw[:4]
|
||||||
|
name = '%06d'%idx
|
||||||
|
if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
|
||||||
|
b'FDST', b'AUDI', b'VIDE',}:
|
||||||
|
name += '-' + sig.decode('ascii')
|
||||||
|
elif sig == b'\xe9\x8e\r\n':
|
||||||
|
name += '-' + 'EOF'
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
with open(os.path.join(folder, self.name+'.bin'), 'wb') as f:
|
||||||
|
f.write(self.raw)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class FontRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record):
|
||||||
|
self.raw = record.raw
|
||||||
|
name = '%06d'%idx
|
||||||
|
self.font = read_font_record(self.raw)
|
||||||
|
if self.font['err']:
|
||||||
|
raise ValueError('Failed to read font record: %s Headers: %s'%(
|
||||||
|
self.font['err'], self.font['headers']))
|
||||||
|
self.payload = (self.font['font_data'] if self.font['font_data'] else
|
||||||
|
self.font['raw_data'])
|
||||||
|
self.name = '%s.%s'%(name, self.font['ext'])
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
with open(os.path.join(folder, self.name), 'wb') as f:
|
||||||
|
f.write(self.payload)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class TBSIndexing(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, text_records, indices, doc_type):
|
||||||
|
self.record_indices = OrderedDict()
|
||||||
|
self.doc_type = doc_type
|
||||||
|
self.indices = indices
|
||||||
|
pos = 0
|
||||||
|
for r in text_records:
|
||||||
|
start = pos
|
||||||
|
pos += len(r.raw)
|
||||||
|
end = pos - 1
|
||||||
|
self.record_indices[r] = x = {'starts':[], 'ends':[],
|
||||||
|
'complete':[], 'geom': (start, end)}
|
||||||
|
for entry in indices:
|
||||||
|
istart, sz = entry.offset, entry.size
|
||||||
|
iend = istart + sz - 1
|
||||||
|
has_start = istart >= start and istart <= end
|
||||||
|
has_end = iend >= start and iend <= end
|
||||||
|
rec = None
|
||||||
|
if has_start and has_end:
|
||||||
|
rec = 'complete'
|
||||||
|
elif has_start and not has_end:
|
||||||
|
rec = 'starts'
|
||||||
|
elif not has_start and has_end:
|
||||||
|
rec = 'ends'
|
||||||
|
if rec:
|
||||||
|
x[rec].append(entry)
|
||||||
|
|
||||||
|
def get_index(self, idx):
|
||||||
|
for i in self.indices:
|
||||||
|
if i.index in {idx, unicode(idx)}: return i
|
||||||
|
raise IndexError('Index %d not found'%idx)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' TBS Indexing (%d records) '%len(self.record_indices)+ '*'*20]
|
||||||
|
for r, dat in self.record_indices.iteritems():
|
||||||
|
ans += self.dump_record(r, dat)[-1]
|
||||||
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
def dump(self, bdir):
|
||||||
|
types = defaultdict(list)
|
||||||
|
for r, dat in self.record_indices.iteritems():
|
||||||
|
tbs_type, strings = self.dump_record(r, dat)
|
||||||
|
if tbs_type == 0: continue
|
||||||
|
types[tbs_type] += strings
|
||||||
|
for typ, strings in types.iteritems():
|
||||||
|
with open(os.path.join(bdir, 'tbs_type_%d.txt'%typ), 'wb') as f:
|
||||||
|
f.write('\n'.join(strings))
|
||||||
|
|
||||||
|
def dump_record(self, r, dat):
|
||||||
|
ans = []
|
||||||
|
ans.append('\nRecord #%d: Starts at: %d Ends at: %d'%(r.idx,
|
||||||
|
dat['geom'][0], dat['geom'][1]))
|
||||||
|
s, e, c = dat['starts'], dat['ends'], dat['complete']
|
||||||
|
ans.append(('\tContains: %d index entries '
|
||||||
|
'(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e,
|
||||||
|
c, s))))
|
||||||
|
byts = bytearray(r.trailing_data.get('indexing', b''))
|
||||||
|
ans.append('TBS bytes: %s'%format_bytes(byts))
|
||||||
|
for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)):
|
||||||
|
if entries:
|
||||||
|
ans.append('\t%s:'%typ)
|
||||||
|
for x in entries:
|
||||||
|
ans.append(('\t\tIndex Entry: %s (Parent index: %s, '
|
||||||
|
'Depth: %d, Offset: %d, Size: %d) [%s]')%(
|
||||||
|
x.index, x.parent_index, x.depth, x.offset, x.size, x.label))
|
||||||
|
def bin4(num):
|
||||||
|
ans = bin(num)[2:]
|
||||||
|
return bytes('0'*(4-len(ans)) + ans)
|
||||||
|
|
||||||
|
def repr_extra(x):
|
||||||
|
return str({bin4(k):v for k, v in extra.iteritems()})
|
||||||
|
|
||||||
|
tbs_type = 0
|
||||||
|
is_periodical = self.doc_type in (257, 258, 259)
|
||||||
|
if len(byts):
|
||||||
|
outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
|
||||||
|
byts = byts[consumed:]
|
||||||
|
for k in extra:
|
||||||
|
tbs_type |= k
|
||||||
|
ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type)))
|
||||||
|
ans.append('Outermost index: %d'%outermost_index)
|
||||||
|
ans.append('Unknown extra start bytes: %s'%repr_extra(extra))
|
||||||
|
if is_periodical: # Hierarchical periodical
|
||||||
|
try:
|
||||||
|
byts, a = self.interpret_periodical(tbs_type, byts,
|
||||||
|
dat['geom'][0])
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
a = []
|
||||||
|
print ('Failed to decode TBS bytes for record: %d'%r.idx)
|
||||||
|
ans += a
|
||||||
|
if byts:
|
||||||
|
sbyts = tuple(hex(b)[2:] for b in byts)
|
||||||
|
ans.append('Remaining bytes: %s'%' '.join(sbyts))
|
||||||
|
|
||||||
|
ans.append('')
|
||||||
|
return tbs_type, ans
|
||||||
|
|
||||||
|
def interpret_periodical(self, tbs_type, byts, record_offset):
|
||||||
|
ans = []
|
||||||
|
|
||||||
|
def read_section_transitions(byts, psi=None): # {{{
|
||||||
|
if psi is None:
|
||||||
|
# Assume previous section is 1
|
||||||
|
psi = self.get_index(1)
|
||||||
|
|
||||||
|
while byts:
|
||||||
|
ai, extra, consumed = decode_tbs(byts)
|
||||||
|
byts = byts[consumed:]
|
||||||
|
if extra.get(0b0010, None) is not None:
|
||||||
|
raise ValueError('Dont know how to interpret flag 0b0010'
|
||||||
|
' while reading section transitions')
|
||||||
|
if extra.get(0b1000, None) is not None:
|
||||||
|
if len(extra) > 1:
|
||||||
|
raise ValueError('Dont know how to interpret flags'
|
||||||
|
' %r while reading section transitions'%extra)
|
||||||
|
nsi = self.get_index(psi.index+1)
|
||||||
|
ans.append('Last article in this record of section %d'
|
||||||
|
' (relative to next section index [%d]): '
|
||||||
|
'%d [%d absolute index]'%(psi.index, nsi.index, ai,
|
||||||
|
ai+nsi.index))
|
||||||
|
psi = nsi
|
||||||
|
continue
|
||||||
|
|
||||||
|
ans.append('First article in this record of section %d'
|
||||||
|
' (relative to its parent section): '
|
||||||
|
'%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
|
||||||
|
|
||||||
|
num = extra.get(0b0100, None)
|
||||||
|
if num is None:
|
||||||
|
msg = ('The section %d has at most one article'
|
||||||
|
' in this record')%psi.index
|
||||||
|
else:
|
||||||
|
msg = ('Number of articles in this record of '
|
||||||
|
'section %d: %d')%(psi.index, num)
|
||||||
|
ans.append(msg)
|
||||||
|
|
||||||
|
offset = extra.get(0b0001, None)
|
||||||
|
if offset is not None:
|
||||||
|
if offset == 0:
|
||||||
|
ans.append('This record is spanned by the article:'
|
||||||
|
'%d'%(ai+psi.index))
|
||||||
|
else:
|
||||||
|
ans.append('->Offset to start of next section (%d) from start'
|
||||||
|
' of record: %d [%d absolute offset]'%(psi.index+1,
|
||||||
|
offset, offset+record_offset))
|
||||||
|
return byts
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def read_starting_section(byts): # {{{
|
||||||
|
orig = byts
|
||||||
|
si, extra, consumed = decode_tbs(byts)
|
||||||
|
byts = byts[consumed:]
|
||||||
|
if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra:
|
||||||
|
raise ValueError('Dont know how to interpret flags %r'
|
||||||
|
' when reading starting section'%extra)
|
||||||
|
si = self.get_index(si)
|
||||||
|
ans.append('The section at the start of this record is:'
|
||||||
|
' %s'%si.index)
|
||||||
|
if 0b0100 in extra:
|
||||||
|
num = extra[0b0100]
|
||||||
|
ans.append('The number of articles from the section %d'
|
||||||
|
' in this record: %s'%(si.index, num))
|
||||||
|
elif 0b0001 in extra:
|
||||||
|
eof = extra[0b0001]
|
||||||
|
if eof != 0:
|
||||||
|
raise ValueError('Unknown eof value %s when reading'
|
||||||
|
' starting section. All bytes: %r'%(eof, orig))
|
||||||
|
ans.append('??This record has more than one article from '
|
||||||
|
' the section: %s'%si.index)
|
||||||
|
return si, byts
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
if tbs_type & 0b0100:
|
||||||
|
# Starting section is the first section
|
||||||
|
ssi = self.get_index(1)
|
||||||
|
else:
|
||||||
|
ssi, byts = read_starting_section(byts)
|
||||||
|
|
||||||
|
byts = read_section_transitions(byts, ssi)
|
||||||
|
|
||||||
|
return byts, ans
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class MOBIFile(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, mf):
|
||||||
|
for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
|
||||||
|
'huffman_record_nums',):
|
||||||
|
setattr(self, x, getattr(mf, x))
|
||||||
|
|
||||||
|
self.index_header = self.index_record = None
|
||||||
|
self.indexing_record_nums = set()
|
||||||
|
pir = self.mobi_header.primary_index_record
|
||||||
|
if pir != NULL_INDEX:
|
||||||
|
self.index_header = IndexHeader(self.records[pir])
|
||||||
|
numi = self.index_header.index_count
|
||||||
|
self.cncx = CNCX(self.records[
|
||||||
|
pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks],
|
||||||
|
self.index_header.index_encoding)
|
||||||
|
self.index_record = IndexRecord(self.records[pir+1:pir+1+numi],
|
||||||
|
self.index_header, self.cncx)
|
||||||
|
self.indexing_record_nums = set(xrange(pir,
|
||||||
|
pir+1+numi+self.index_header.num_of_cncx_blocks))
|
||||||
|
self.secondary_index_record = self.secondary_index_header = None
|
||||||
|
sir = self.mobi_header.secondary_index_record
|
||||||
|
if sir != NULL_INDEX:
|
||||||
|
self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
|
||||||
|
numi = self.secondary_index_header.index_count
|
||||||
|
self.indexing_record_nums.add(sir)
|
||||||
|
self.secondary_index_record = IndexRecord(
|
||||||
|
self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx)
|
||||||
|
self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi))
|
||||||
|
|
||||||
|
|
||||||
|
ntr = self.mobi_header.number_of_text_records
|
||||||
|
fntbr = self.mobi_header.first_non_book_record
|
||||||
|
fii = self.mobi_header.first_image_index
|
||||||
|
if fntbr == NULL_INDEX:
|
||||||
|
fntbr = len(self.records)
|
||||||
|
self.text_records = [TextRecord(r, self.records[r],
|
||||||
|
self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1,
|
||||||
|
min(len(self.records), ntr+1))]
|
||||||
|
self.image_records, self.binary_records = [], []
|
||||||
|
self.font_records = []
|
||||||
|
image_index = 0
|
||||||
|
for i in xrange(fntbr, len(self.records)):
|
||||||
|
if i in self.indexing_record_nums or i in self.huffman_record_nums:
|
||||||
|
continue
|
||||||
|
image_index += 1
|
||||||
|
r = self.records[i]
|
||||||
|
fmt = None
|
||||||
|
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
|
||||||
|
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||||
|
b'AUDI', b'VIDE', b'FONT'}:
|
||||||
|
try:
|
||||||
|
width, height, fmt = identify_data(r.raw)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if fmt is not None:
|
||||||
|
self.image_records.append(ImageRecord(image_index, r, fmt))
|
||||||
|
elif r.raw[:4] == b'FONT':
|
||||||
|
self.font_records.append(FontRecord(i, r))
|
||||||
|
else:
|
||||||
|
self.binary_records.append(BinaryRecord(i, r))
|
||||||
|
|
||||||
|
if self.index_record is not None:
|
||||||
|
self.tbs_indexing = TBSIndexing(self.text_records,
|
||||||
|
self.index_record.indices, self.mobi_header.type_raw)
|
||||||
|
|
||||||
|
def print_header(self, f=sys.stdout):
|
||||||
|
print (str(self.palmdb).encode('utf-8'), file=f)
|
||||||
|
print (file=f)
|
||||||
|
print ('Record headers:', file=f)
|
||||||
|
for i, r in enumerate(self.records):
|
||||||
|
print ('%6d. %s'%(i, r.header), file=f)
|
||||||
|
|
||||||
|
print (file=f)
|
||||||
|
print (str(self.mobi_header).encode('utf-8'), file=f)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def inspect_mobi(mobi_file, ddir):
|
||||||
|
f = MOBIFile(mobi_file)
|
||||||
|
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
||||||
|
f.print_header(f=out)
|
||||||
|
|
||||||
|
alltext = os.path.join(ddir, 'text.html')
|
||||||
|
with open(alltext, 'wb') as of:
|
||||||
|
alltext = b''
|
||||||
|
for rec in f.text_records:
|
||||||
|
of.write(rec.raw)
|
||||||
|
alltext += rec.raw
|
||||||
|
of.seek(0)
|
||||||
|
|
||||||
|
root = html.fromstring(alltext.decode('utf-8'))
|
||||||
|
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
|
||||||
|
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
|
||||||
|
include_meta_content_type=True))
|
||||||
|
|
||||||
|
if f.index_header is not None:
|
||||||
|
f.index_record.alltext = alltext
|
||||||
|
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
|
||||||
|
print(str(f.index_header), file=out)
|
||||||
|
print('\n\n', file=out)
|
||||||
|
if f.secondary_index_header is not None:
|
||||||
|
print(str(f.secondary_index_header).encode('utf-8'), file=out)
|
||||||
|
print('\n\n', file=out)
|
||||||
|
if f.secondary_index_record is not None:
|
||||||
|
print(str(f.secondary_index_record).encode('utf-8'), file=out)
|
||||||
|
print('\n\n', file=out)
|
||||||
|
print(str(f.cncx).encode('utf-8'), file=out)
|
||||||
|
print('\n\n', file=out)
|
||||||
|
print(str(f.index_record), file=out)
|
||||||
|
with open(os.path.join(ddir, 'tbs_indexing.txt'), 'wb') as out:
|
||||||
|
print(str(f.tbs_indexing), file=out)
|
||||||
|
f.tbs_indexing.dump(ddir)
|
||||||
|
|
||||||
|
for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
|
||||||
|
('binary', 'binary_records'), ('font', 'font_records')]:
|
||||||
|
tdir = os.path.join(ddir, tdir)
|
||||||
|
os.mkdir(tdir)
|
||||||
|
for rec in getattr(f, attr):
|
||||||
|
rec.dump(tdir)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
62
src/calibre/ebooks/mobi/debug/mobi8.py
Normal file
62
src/calibre/ebooks/mobi/debug/mobi8.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
|
|
||||||
|
class MOBIFile(object):
|
||||||
|
|
||||||
|
def __init__(self, mf):
|
||||||
|
self.mf = mf
|
||||||
|
h, h8 = mf.mobi_header, mf.mobi8_header
|
||||||
|
first_text_record = 1
|
||||||
|
offset = 0
|
||||||
|
res_end = len(mf.records)
|
||||||
|
if mf.kf8_type == 'joint':
|
||||||
|
offset = h.exth.kf8_header_index
|
||||||
|
res_end = offset - 1
|
||||||
|
|
||||||
|
self.resource_records = mf.records[h.first_non_book_record:res_end]
|
||||||
|
self.text_records = [TextRecord(i, r, h8.extra_data_flags,
|
||||||
|
mf.decompress8) for i, r in
|
||||||
|
enumerate(mf.records[first_text_record+offset:
|
||||||
|
first_text_record+offset+h8.number_of_text_records])]
|
||||||
|
|
||||||
|
self.raw_text = b''.join(r.raw for r in self.text_records)
|
||||||
|
|
||||||
|
def print_header(self, f=sys.stdout):
|
||||||
|
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
||||||
|
print (file=f)
|
||||||
|
print ('Record headers:', file=f)
|
||||||
|
for i, r in enumerate(self.mf.records):
|
||||||
|
print ('%6d. %s'%(i, r.header), file=f)
|
||||||
|
|
||||||
|
print (file=f)
|
||||||
|
print (str(self.mf.mobi8_header).encode('utf-8'), file=f)
|
||||||
|
|
||||||
|
|
||||||
|
def inspect_mobi(mobi_file, ddir):
|
||||||
|
f = MOBIFile(mobi_file)
|
||||||
|
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
||||||
|
f.print_header(f=out)
|
||||||
|
|
||||||
|
alltext = os.path.join(ddir, 'raw_text.html')
|
||||||
|
with open(alltext, 'wb') as of:
|
||||||
|
of.write(f.raw_text)
|
||||||
|
|
||||||
|
for tdir, attr in [('text_records', 'text_records'), ('images',
|
||||||
|
'image_records'), ('binary', 'binary_records'), ('font',
|
||||||
|
'font_records')]:
|
||||||
|
tdir = os.path.join(ddir, tdir)
|
||||||
|
os.mkdir(tdir)
|
||||||
|
for rec in getattr(f, attr, []):
|
||||||
|
rec.dump(tdir)
|
||||||
|
|
||||||
|
|
@ -186,20 +186,16 @@ class BookHeader(object):
|
|||||||
if len(raw) >= 0xF8:
|
if len(raw) >= 0xF8:
|
||||||
self.ncxidx, = struct.unpack_from(b'>L', raw, 0xF4)
|
self.ncxidx, = struct.unpack_from(b'>L', raw, 0xF4)
|
||||||
|
|
||||||
if self.mobi_version >= 8:
|
# Ancient PRC files from Baen can have random values for
|
||||||
self.skelidx, = struct.unpack_from('>L', raw, 0xFC)
|
# mobi_version, so be conservative
|
||||||
|
if self.mobi_version == 8 and len(raw) >= (0xF8 + 16):
|
||||||
# Index into <div> sections in raw_ml
|
self.dividx, self.skelidx, self.datpidx, self.othidx = \
|
||||||
self.dividx, = struct.unpack_from('>L', raw, 0xF8)
|
struct.unpack_from(b'>4L', raw, 0xF8)
|
||||||
|
|
||||||
# Index into Other files
|
|
||||||
self.othidx, = struct.unpack_from('>L', raw, 0x104)
|
|
||||||
|
|
||||||
# need to use the FDST record to find out how to properly
|
# need to use the FDST record to find out how to properly
|
||||||
# unpack the raw_ml into pieces it is simply a table of start
|
# unpack the raw_ml into pieces it is simply a table of start
|
||||||
# and end locations for each flow piece
|
# and end locations for each flow piece
|
||||||
self.fdstidx, = struct.unpack_from('>L', raw, 0xC0)
|
self.fdstidx, self.fdstcnt = struct.unpack_from(b'>2L', raw, 0xC0)
|
||||||
self.fdstcnt, = struct.unpack_from('>L', raw, 0xC4)
|
|
||||||
# if cnt is 1 or less, fdst section number can be garbage
|
# if cnt is 1 or less, fdst section number can be garbage
|
||||||
if self.fdstcnt <= 1:
|
if self.fdstcnt <= 1:
|
||||||
self.fdstidx = NULL_INDEX
|
self.fdstidx = NULL_INDEX
|
||||||
|
@ -8,9 +8,13 @@ __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct
|
import struct
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict, namedtuple
|
||||||
|
|
||||||
from calibre.ebooks.mobi.utils import decint, count_set_bits
|
from calibre.ebooks.mobi.utils import (decint, count_set_bits,
|
||||||
|
decode_string)
|
||||||
|
|
||||||
|
TagX = namedtuple('TagX', 'tag num_of_values bitmask eof')
|
||||||
|
PTagX = namedtuple('PTagX', 'tag value_count value_bytes num_of_values')
|
||||||
|
|
||||||
class InvalidFile(ValueError):
|
class InvalidFile(ValueError):
|
||||||
pass
|
pass
|
||||||
@ -37,9 +41,8 @@ def parse_indx_header(data):
|
|||||||
'lng', 'total', 'ordt', 'ligt', 'nligt', 'ncncx'
|
'lng', 'total', 'ordt', 'ligt', 'nligt', 'ncncx'
|
||||||
)
|
)
|
||||||
num = len(words)
|
num = len(words)
|
||||||
values = struct.unpack(b'>%dL' % num, data[4:4*(num+1)])
|
values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)])
|
||||||
header = {words[i]:values[i] for i in xrange(num)}
|
return dict(zip(words, values))
|
||||||
return header
|
|
||||||
|
|
||||||
class CNCX(object): # {{{
|
class CNCX(object): # {{{
|
||||||
|
|
||||||
@ -77,81 +80,116 @@ class CNCX(object): # {{{
|
|||||||
return self.records.get(offset, default)
|
return self.records.get(offset, default)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def parse_tag_section(data):
|
def parse_tagx_section(data):
|
||||||
check_signature(data, b'TAGX')
|
check_signature(data, b'TAGX')
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
first_entry_offset, = struct.unpack_from(b'>L', data, 0x04)
|
first_entry_offset, = struct.unpack_from(b'>L', data, 4)
|
||||||
control_byte_count, = struct.unpack_from(b'>L', data, 0x08)
|
control_byte_count, = struct.unpack_from(b'>L', data, 8)
|
||||||
|
|
||||||
# Skip the first 12 bytes already read above.
|
|
||||||
for i in xrange(12, first_entry_offset, 4):
|
for i in xrange(12, first_entry_offset, 4):
|
||||||
pos = i
|
vals = list(bytearray(data[i:i+4]))
|
||||||
tags.append((ord(data[pos]), ord(data[pos+1]), ord(data[pos+2]),
|
tags.append(TagX(*vals))
|
||||||
ord(data[pos+3])))
|
|
||||||
return control_byte_count, tags
|
return control_byte_count, tags
|
||||||
|
|
||||||
def get_tag_map(control_byte_count, tags, data, start, end):
|
def get_tag_map(control_byte_count, tagx, data, strict=False):
|
||||||
ptags = []
|
ptags = []
|
||||||
ans = {}
|
ans = {}
|
||||||
control_byte_index = 0
|
control_bytes = list(bytearray(data[:control_byte_count]))
|
||||||
data_start = start + control_byte_count
|
data = data[control_byte_count:]
|
||||||
|
|
||||||
for tag, values_per_entry, mask, end_flag in tags:
|
for x in tagx:
|
||||||
if end_flag == 0x01:
|
if x.eof == 0x01:
|
||||||
control_byte_index += 1
|
control_bytes = control_bytes[1:]
|
||||||
continue
|
continue
|
||||||
value = ord(data[start + control_byte_index]) & mask
|
value = control_bytes[0] & x.bitmask
|
||||||
if value != 0:
|
if value != 0:
|
||||||
if value == mask:
|
value_count = value_bytes = None
|
||||||
if count_set_bits(mask) > 1:
|
if value == x.bitmask:
|
||||||
|
if count_set_bits(x.bitmask) > 1:
|
||||||
# If all bits of masked value are set and the mask has more
|
# If all bits of masked value are set and the mask has more
|
||||||
# than one bit, a variable width value will follow after
|
# than one bit, a variable width value will follow after
|
||||||
# the control bytes which defines the length of bytes (NOT
|
# the control bytes which defines the length of bytes (NOT
|
||||||
# the value count!) which will contain the corresponding
|
# the value count!) which will contain the corresponding
|
||||||
# variable width values.
|
# variable width values.
|
||||||
value, consumed = decint(data[data_start:])
|
value_bytes, consumed = decint(data)
|
||||||
data_start += consumed
|
data = data[consumed:]
|
||||||
ptags.append((tag, None, value, values_per_entry))
|
|
||||||
else:
|
else:
|
||||||
ptags.append((tag, 1, None, values_per_entry))
|
value_count = 1
|
||||||
else:
|
else:
|
||||||
# Shift bits to get the masked value.
|
# Shift bits to get the masked value.
|
||||||
while mask & 0x01 == 0:
|
mask = x.bitmask
|
||||||
mask = mask >> 1
|
while mask & 0b1 == 0:
|
||||||
value = value >> 1
|
mask >>= 1
|
||||||
ptags.append((tag, value, None, values_per_entry))
|
value >>= 1
|
||||||
for tag, value_count, value_bytes, values_per_entry in ptags:
|
value_count = value
|
||||||
|
ptags.append(PTagX(x.tag, value_count, value_bytes,
|
||||||
|
x.num_of_values))
|
||||||
|
|
||||||
|
for x in ptags:
|
||||||
values = []
|
values = []
|
||||||
if value_count != None:
|
if x.value_count is not None:
|
||||||
# Read value_count * values_per_entry variable width values.
|
# Read value_count * values_per_entry variable width values.
|
||||||
for _ in xrange(value_count*values_per_entry):
|
for _ in xrange(x.value_count * x.num_of_values):
|
||||||
byts, consumed = decint(data[data_start:])
|
byts, consumed = decint(data)
|
||||||
data_start += consumed
|
data = data[consumed:]
|
||||||
values.append(byts)
|
values.append(byts)
|
||||||
else:
|
else: # value_bytes is not None
|
||||||
# Convert value_bytes to variable width values.
|
# Convert value_bytes to variable width values.
|
||||||
total_consumed = 0
|
total_consumed = 0
|
||||||
while total_consumed < value_bytes:
|
while total_consumed < x.value_bytes:
|
||||||
# Does this work for values_per_entry != 1?
|
# Does this work for values_per_entry != 1?
|
||||||
byts, consumed = decint(data[data_start:])
|
byts, consumed = decint(data)
|
||||||
data_start += consumed
|
data = data[consumed:]
|
||||||
total_consumed += consumed
|
total_consumed += consumed
|
||||||
values.append(byts)
|
values.append(byts)
|
||||||
if total_consumed != value_bytes:
|
if total_consumed != x.value_bytes:
|
||||||
print ("Error: Should consume %s bytes, but consumed %s" %
|
err = ("Error: Should consume %s bytes, but consumed %s" %
|
||||||
(value_bytes, total_consumed))
|
(x.value_bytes, total_consumed))
|
||||||
ans[tag] = values
|
if strict:
|
||||||
# Test that all bytes have been processed if end is given.
|
raise ValueError(err)
|
||||||
if end is not None and data_start < end:
|
else:
|
||||||
# The last entry might have some zero padding bytes, so complain only if non zero bytes are left.
|
print(err)
|
||||||
rest = data[data_start:end]
|
ans[x.tag] = values
|
||||||
if rest.replace(b'\0', b''):
|
# Test that all bytes have been processed
|
||||||
print ("Warning: There are unprocessed index bytes left: %s" %
|
if data.replace(b'\0', b''):
|
||||||
format_bytes(rest))
|
err = ("Warning: There are unprocessed index bytes left: %s" %
|
||||||
|
format_bytes(data))
|
||||||
|
if strict:
|
||||||
|
raise ValueError(err)
|
||||||
|
else:
|
||||||
|
print(err)
|
||||||
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def parse_index_record(table, data, control_byte_count, tags, codec,
|
||||||
|
strict=False):
|
||||||
|
header = parse_indx_header(data)
|
||||||
|
idxt_pos = header['start']
|
||||||
|
if data[idxt_pos:idxt_pos+4] != b'IDXT':
|
||||||
|
print ('WARNING: Invalid INDX record')
|
||||||
|
entry_count = header['count']
|
||||||
|
|
||||||
|
# loop through to build up the IDXT position starts
|
||||||
|
idx_positions= []
|
||||||
|
for j in xrange(entry_count):
|
||||||
|
pos, = struct.unpack_from(b'>H', data, idxt_pos + 4 + (2 * j))
|
||||||
|
idx_positions.append(pos)
|
||||||
|
# The last entry ends before the IDXT tag (but there might be zero fill
|
||||||
|
# bytes we need to ignore!)
|
||||||
|
idx_positions.append(idxt_pos)
|
||||||
|
|
||||||
|
# For each entry in the IDXT build up the tag map and any associated
|
||||||
|
# text
|
||||||
|
for j in xrange(entry_count):
|
||||||
|
start, end = idx_positions[j:j+2]
|
||||||
|
rec = data[start:end]
|
||||||
|
ident, consumed = decode_string(rec, codec=codec)
|
||||||
|
rec = rec[consumed:]
|
||||||
|
tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict)
|
||||||
|
table[ident] = tag_map
|
||||||
|
|
||||||
|
|
||||||
def read_index(sections, idx, codec):
|
def read_index(sections, idx, codec):
|
||||||
table, cncx = OrderedDict(), CNCX([], codec)
|
table, cncx = OrderedDict(), CNCX([], codec)
|
||||||
|
|
||||||
@ -166,32 +204,11 @@ def read_index(sections, idx, codec):
|
|||||||
cncx = CNCX(cncx_records, codec)
|
cncx = CNCX(cncx_records, codec)
|
||||||
|
|
||||||
tag_section_start = indx_header['len']
|
tag_section_start = indx_header['len']
|
||||||
control_byte_count, tags = parse_tag_section(data[tag_section_start:])
|
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
|
||||||
|
|
||||||
for i in xrange(idx + 1, idx + 1 + indx_count):
|
for i in xrange(idx + 1, idx + 1 + indx_count):
|
||||||
|
# Index record
|
||||||
data = sections[i][0]
|
data = sections[i][0]
|
||||||
header = parse_indx_header(data)
|
parse_index_record(table, data, control_byte_count, tags, codec)
|
||||||
idxt_pos = header['start']
|
|
||||||
entry_count = header['count']
|
|
||||||
|
|
||||||
# loop through to build up the IDXT position starts
|
|
||||||
idx_positions= []
|
|
||||||
for j in xrange(entry_count):
|
|
||||||
pos, = struct.unpack_from(b'>H', data, idxt_pos + 4 + (2 * j))
|
|
||||||
idx_positions.append(pos)
|
|
||||||
# The last entry ends before the IDXT tag (but there might be zero fill
|
|
||||||
# bytes we need to ignore!)
|
|
||||||
idx_positions.append(idxt_pos)
|
|
||||||
|
|
||||||
# For each entry in the IDXT build up the tag map and any associated
|
|
||||||
# text
|
|
||||||
for j in xrange(entry_count):
|
|
||||||
start, end = idx_positions[j:j+2]
|
|
||||||
text_length = ord(data[start])
|
|
||||||
text = data[start+1:start+1+text_length]
|
|
||||||
tag_map = get_tag_map(control_byte_count, tags, data,
|
|
||||||
start+1+text_length, end)
|
|
||||||
table[text] = tag_map
|
|
||||||
|
|
||||||
return table, cncx
|
return table, cncx
|
||||||
|
|
||||||
|
@ -33,9 +33,11 @@ def update_internal_links(mobi8_reader):
|
|||||||
for m in posfid_index_pattern.finditer(tag):
|
for m in posfid_index_pattern.finditer(tag):
|
||||||
posfid = m.group(1)
|
posfid = m.group(1)
|
||||||
offset = m.group(2)
|
offset = m.group(2)
|
||||||
filename, idtag = mr.get_id_tag_by_pos_fid(posfid, offset)
|
filename, idtag = mr.get_id_tag_by_pos_fid(int(posfid, 32),
|
||||||
|
int(offset, 32))
|
||||||
suffix = (b'#' + idtag) if idtag else b''
|
suffix = (b'#' + idtag) if idtag else b''
|
||||||
replacement = filename.encode(mr.header.codec) + suffix
|
replacement = filename.split('/')[-1].encode(
|
||||||
|
mr.header.codec) + suffix
|
||||||
tag = posfid_index_pattern.sub(replacement, tag, 1)
|
tag = posfid_index_pattern.sub(replacement, tag, 1)
|
||||||
srcpieces[j] = tag
|
srcpieces[j] = tag
|
||||||
part = ''.join([x.decode(mr.header.codec) for x in srcpieces])
|
part = ''.join([x.decode(mr.header.codec) for x in srcpieces])
|
||||||
|
@ -107,7 +107,10 @@ class MobiReader(object):
|
|||||||
self.kf8_type = None
|
self.kf8_type = None
|
||||||
k8i = getattr(self.book_header.exth, 'kf8_header', None)
|
k8i = getattr(self.book_header.exth, 'kf8_header', None)
|
||||||
|
|
||||||
if self.book_header.mobi_version == 8:
|
# Ancient PRC files from Baen can have random values for
|
||||||
|
# mobi_version, so be conservative
|
||||||
|
if (self.book_header.mobi_version == 8 and hasattr(self.book_header,
|
||||||
|
'skelidx')):
|
||||||
self.kf8_type = 'standalone'
|
self.kf8_type = 'standalone'
|
||||||
elif k8i is not None: # Check for joint mobi 6 and kf 8 file
|
elif k8i is not None: # Check for joint mobi 6 and kf 8 file
|
||||||
try:
|
try:
|
||||||
@ -118,12 +121,17 @@ class MobiReader(object):
|
|||||||
try:
|
try:
|
||||||
self.book_header = BookHeader(self.sections[k8i][0],
|
self.book_header = BookHeader(self.sections[k8i][0],
|
||||||
self.ident, user_encoding, self.log)
|
self.ident, user_encoding, self.log)
|
||||||
# The following are only correct in the Mobi 6
|
|
||||||
# header not the Mobi 8 header
|
# Only the first_image_index from the MOBI 6 header is
|
||||||
|
# useful
|
||||||
for x in ('first_image_index',):
|
for x in ('first_image_index',):
|
||||||
setattr(self.book_header, x, getattr(bh, x))
|
setattr(self.book_header, x, getattr(bh, x))
|
||||||
|
|
||||||
|
# We need to do this because the MOBI 6 text extract code
|
||||||
|
# does not know anything about the kf8 offset
|
||||||
if hasattr(self.book_header, 'huff_offset'):
|
if hasattr(self.book_header, 'huff_offset'):
|
||||||
self.book_header.huff_offset += k8i
|
self.book_header.huff_offset += k8i
|
||||||
|
|
||||||
self.kf8_type = 'joint'
|
self.kf8_type = 'joint'
|
||||||
self.kf8_boundary = k8i-1
|
self.kf8_boundary = k8i-1
|
||||||
except:
|
except:
|
||||||
@ -151,6 +159,7 @@ class MobiReader(object):
|
|||||||
self.processed_html = self.processed_html.replace('</</', '</')
|
self.processed_html = self.processed_html.replace('</</', '</')
|
||||||
self.processed_html = re.sub(r'</([a-zA-Z]+)<', r'</\1><',
|
self.processed_html = re.sub(r'</([a-zA-Z]+)<', r'</\1><',
|
||||||
self.processed_html)
|
self.processed_html)
|
||||||
|
self.processed_html = self.processed_html.replace(u'\ufeff', '')
|
||||||
# Remove tags of the form <xyz: ...> as they can cause issues further
|
# Remove tags of the form <xyz: ...> as they can cause issues further
|
||||||
# along the pipeline
|
# along the pipeline
|
||||||
self.processed_html = re.sub(r'</{0,1}[a-zA-Z]+:\s+[^>]*>', '',
|
self.processed_html = re.sub(r'</{0,1}[a-zA-Z]+:\s+[^>]*>', '',
|
||||||
|
@ -33,6 +33,7 @@ class Mobi8Reader(object):
|
|||||||
def __init__(self, mobi6_reader, log):
|
def __init__(self, mobi6_reader, log):
|
||||||
self.mobi6_reader, self.log = mobi6_reader, log
|
self.mobi6_reader, self.log = mobi6_reader, log
|
||||||
self.header = mobi6_reader.book_header
|
self.header = mobi6_reader.book_header
|
||||||
|
self.encrypted_fonts = []
|
||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
self.mobi6_reader.check_for_drm()
|
self.mobi6_reader.check_for_drm()
|
||||||
@ -229,11 +230,9 @@ class Mobi8Reader(object):
|
|||||||
|
|
||||||
def get_id_tag_by_pos_fid(self, posfid, offset):
|
def get_id_tag_by_pos_fid(self, posfid, offset):
|
||||||
# first convert kindle:pos:fid and offset info to position in file
|
# first convert kindle:pos:fid and offset info to position in file
|
||||||
row = int(posfid, 32)
|
insertpos, idtext, filenum, seqnm, startpos, length = self.elems[posfid]
|
||||||
off = int(offset, 32)
|
pos = insertpos + offset
|
||||||
[insertpos, idtext, filenum, seqnm, startpos, length] = self.elems[row]
|
fi = self.get_file_info(pos)
|
||||||
pos = insertpos + off
|
|
||||||
fname = self.get_file_info(pos).filename
|
|
||||||
# an existing "id=" must exist in original xhtml otherwise it would not
|
# an existing "id=" must exist in original xhtml otherwise it would not
|
||||||
# have worked for linking. Amazon seems to have added its own
|
# have worked for linking. Amazon seems to have added its own
|
||||||
# additional "aid=" inside tags whose contents seem to represent some
|
# additional "aid=" inside tags whose contents seem to represent some
|
||||||
@ -242,7 +241,7 @@ class Mobi8Reader(object):
|
|||||||
# so find the closest "id=" before position the file by actually
|
# so find the closest "id=" before position the file by actually
|
||||||
# searching in that file
|
# searching in that file
|
||||||
idtext = self.get_id_tag(pos)
|
idtext = self.get_id_tag(pos)
|
||||||
return fname, idtext
|
return '%s/%s'%(fi.type, fi.filename), idtext
|
||||||
|
|
||||||
def get_id_tag(self, pos):
|
def get_id_tag(self, pos):
|
||||||
# find the correct tag by actually searching in the destination
|
# find the correct tag by actually searching in the destination
|
||||||
@ -253,12 +252,13 @@ class Mobi8Reader(object):
|
|||||||
textblock = self.parts[fi.num]
|
textblock = self.parts[fi.num]
|
||||||
id_map = []
|
id_map = []
|
||||||
npos = pos - fi.start
|
npos = pos - fi.start
|
||||||
# if npos inside a tag then search all text before the its end of tag
|
|
||||||
# marker
|
|
||||||
pgt = textblock.find(b'>', npos)
|
pgt = textblock.find(b'>', npos)
|
||||||
plt = textblock.find(b'<', npos)
|
plt = textblock.find(b'<', npos)
|
||||||
if pgt < plt:
|
# if npos inside a tag then search all text before the its end of tag marker
|
||||||
|
# else not in a tag need to search the preceding tag
|
||||||
|
if plt == npos or pgt < plt:
|
||||||
npos = pgt + 1
|
npos = pgt + 1
|
||||||
|
textblock = textblock[0:npos]
|
||||||
# find id links only inside of tags
|
# find id links only inside of tags
|
||||||
# inside any < > pair find all "id=' and return whatever is inside
|
# inside any < > pair find all "id=' and return whatever is inside
|
||||||
# the quotes
|
# the quotes
|
||||||
@ -315,12 +315,18 @@ class Mobi8Reader(object):
|
|||||||
|
|
||||||
# Add href and anchor info to the index entries
|
# Add href and anchor info to the index entries
|
||||||
for entry in index_entries:
|
for entry in index_entries:
|
||||||
pos = entry['pos']
|
pos_fid = entry['pos_fid']
|
||||||
fi = self.get_file_info(pos)
|
if pos_fid is None:
|
||||||
if fi.filename is None:
|
pos = entry['pos']
|
||||||
raise ValueError('Index entry has invalid pos: %d'%pos)
|
fi = self.get_file_info(pos)
|
||||||
idtag = self.get_id_tag(pos).decode(self.header.codec)
|
if fi.filename is None:
|
||||||
entry['href'] = '%s/%s'%(fi.type, fi.filename)
|
raise ValueError('Index entry has invalid pos: %d'%pos)
|
||||||
|
idtag = self.get_id_tag(pos).decode(self.header.codec)
|
||||||
|
href = '%s/%s'%(fi.type, fi.filename)
|
||||||
|
else:
|
||||||
|
href, idtag = self.get_id_tag_by_pos_fid(*pos_fid)
|
||||||
|
|
||||||
|
entry['href'] = href
|
||||||
entry['idtag'] = idtag
|
entry['idtag'] = idtag
|
||||||
|
|
||||||
# Build the TOC object
|
# Build the TOC object
|
||||||
@ -350,6 +356,8 @@ class Mobi8Reader(object):
|
|||||||
with open(href.replace('/', os.sep), 'wb') as f:
|
with open(href.replace('/', os.sep), 'wb') as f:
|
||||||
f.write(font['font_data'] if font['font_data'] else
|
f.write(font['font_data'] if font['font_data'] else
|
||||||
font['raw_data'])
|
font['raw_data'])
|
||||||
|
if font['encrypted']:
|
||||||
|
self.encrypted_fonts.append(href)
|
||||||
else:
|
else:
|
||||||
imgtype = imghdr.what(None, data)
|
imgtype = imghdr.what(None, data)
|
||||||
if imgtype is None:
|
if imgtype is None:
|
||||||
|
@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.mobi.utils import to_base
|
|
||||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
from calibre.ebooks.mobi.reader.index import read_index
|
from calibre.ebooks.mobi.reader.index import read_index
|
||||||
|
|
||||||
@ -23,7 +22,30 @@ tag_fieldname_map = {
|
|||||||
6: ['pos_fid',0],
|
6: ['pos_fid',0],
|
||||||
21: ['parent',0],
|
21: ['parent',0],
|
||||||
22: ['child1',0],
|
22: ['child1',0],
|
||||||
23: ['childn',0]
|
23: ['childn',0],
|
||||||
|
69: ['image_index',0],
|
||||||
|
70 : ['desc_offset', 0], # 'Description offset in cncx'
|
||||||
|
71 : ['author_offset', 0], # 'Author offset in cncx'
|
||||||
|
72 : ['image_caption_offset', 0], # 'Image caption offset in cncx',
|
||||||
|
73 : ['image_attr_offset', 0], # 'Image attribution offset in cncx',
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
default_entry = {
|
||||||
|
'pos': -1,
|
||||||
|
'len': 0,
|
||||||
|
'noffs': -1,
|
||||||
|
'text' : "Unknown Text",
|
||||||
|
'hlvl' : -1,
|
||||||
|
'kind' : "Unknown Class",
|
||||||
|
'pos_fid' : None,
|
||||||
|
'parent' : -1,
|
||||||
|
'child1' : -1,
|
||||||
|
'childn' : -1,
|
||||||
|
'description': None,
|
||||||
|
'author': None,
|
||||||
|
'image_caption': None,
|
||||||
|
'image_attribution': None,
|
||||||
}
|
}
|
||||||
|
|
||||||
def read_ncx(sections, index, codec):
|
def read_ncx(sections, index, codec):
|
||||||
@ -34,32 +56,25 @@ def read_ncx(sections, index, codec):
|
|||||||
|
|
||||||
for num, x in enumerate(table.iteritems()):
|
for num, x in enumerate(table.iteritems()):
|
||||||
text, tag_map = x
|
text, tag_map = x
|
||||||
entry = {
|
entry = default_entry.copy()
|
||||||
'name': text,
|
entry['name'] = text
|
||||||
'pos': -1,
|
entry['num'] = num
|
||||||
'len': 0,
|
|
||||||
'noffs': -1,
|
|
||||||
'text' : "Unknown Text",
|
|
||||||
'hlvl' : -1,
|
|
||||||
'kind' : "Unknown Kind",
|
|
||||||
'pos_fid' : None,
|
|
||||||
'parent' : -1,
|
|
||||||
'child1' : -1,
|
|
||||||
'childn' : -1,
|
|
||||||
'num' : num
|
|
||||||
}
|
|
||||||
|
|
||||||
for tag in tag_fieldname_map.keys():
|
for tag in tag_fieldname_map.iterkeys():
|
||||||
fieldname, i = tag_fieldname_map[tag]
|
fieldname, i = tag_fieldname_map[tag]
|
||||||
if tag in tag_map:
|
if tag in tag_map:
|
||||||
fieldvalue = tag_map[tag][i]
|
fieldvalue = tag_map[tag][i]
|
||||||
if tag == 6:
|
if tag == 6:
|
||||||
fieldvalue = to_base(fieldvalue, base=32)
|
# Appears to be an idx into the KF8 elems table with an
|
||||||
|
# offset
|
||||||
|
fieldvalue = tuple(tag_map[tag])
|
||||||
entry[fieldname] = fieldvalue
|
entry[fieldname] = fieldvalue
|
||||||
if tag == 3:
|
for which, name in {3:'text', 5:'kind', 70:'description',
|
||||||
entry['text'] = cncx.get(fieldvalue, 'Unknown Text')
|
71:'author', 72:'image_caption',
|
||||||
if tag == 5:
|
73:'image_attribution'}.iteritems():
|
||||||
entry['kind'] = cncx.get(fieldvalue, 'Unknown Kind')
|
if tag == which:
|
||||||
|
entry[name] = cncx.get(fieldvalue,
|
||||||
|
default_entry[name])
|
||||||
index_entries.append(entry)
|
index_entries.append(entry)
|
||||||
|
|
||||||
return index_entries
|
return index_entries
|
||||||
|
@ -15,7 +15,13 @@ from calibre.ebooks import normalize
|
|||||||
|
|
||||||
IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
||||||
|
|
||||||
def decode_hex_number(raw):
|
def decode_string(raw, codec='utf-8'):
|
||||||
|
length, = struct.unpack(b'>B', raw[0])
|
||||||
|
raw = raw[1:1+length]
|
||||||
|
consumed = length+1
|
||||||
|
return raw.decode(codec), consumed
|
||||||
|
|
||||||
|
def decode_hex_number(raw, codec='utf-8'):
|
||||||
'''
|
'''
|
||||||
Return a variable length number encoded using hexadecimal encoding. These
|
Return a variable length number encoded using hexadecimal encoding. These
|
||||||
numbers have the first byte which tells the number of bytes that follow.
|
numbers have the first byte which tells the number of bytes that follow.
|
||||||
@ -25,13 +31,16 @@ def decode_hex_number(raw):
|
|||||||
:param raw: Raw binary data as a bytestring
|
:param raw: Raw binary data as a bytestring
|
||||||
|
|
||||||
:return: The number and the number of bytes from raw that the number
|
:return: The number and the number of bytes from raw that the number
|
||||||
occupies
|
occupies.
|
||||||
'''
|
'''
|
||||||
length, = struct.unpack(b'>B', raw[0])
|
raw, consumed = decode_string(raw, codec=codec)
|
||||||
raw = raw[1:1+length]
|
|
||||||
consumed = length+1
|
|
||||||
return int(raw, 16), consumed
|
return int(raw, 16), consumed
|
||||||
|
|
||||||
|
def encode_string(raw):
|
||||||
|
ans = bytearray(bytes(raw))
|
||||||
|
ans.insert(0, len(ans))
|
||||||
|
return bytes(ans)
|
||||||
|
|
||||||
def encode_number_as_hex(num):
|
def encode_number_as_hex(num):
|
||||||
'''
|
'''
|
||||||
Encode num as a variable length encoded hexadecimal number. Returns the
|
Encode num as a variable length encoded hexadecimal number. Returns the
|
||||||
@ -44,9 +53,7 @@ def encode_number_as_hex(num):
|
|||||||
nlen = len(num)
|
nlen = len(num)
|
||||||
if nlen % 2 != 0:
|
if nlen % 2 != 0:
|
||||||
num = b'0'+num
|
num = b'0'+num
|
||||||
ans = bytearray(num)
|
return encode_string(num)
|
||||||
ans.insert(0, len(num))
|
|
||||||
return bytes(ans)
|
|
||||||
|
|
||||||
def encint(value, forward=True):
|
def encint(value, forward=True):
|
||||||
'''
|
'''
|
||||||
@ -430,7 +437,7 @@ def read_font_record(data, extent=1040): # {{{
|
|||||||
# The zlib compressed data begins with 2 bytes of header and
|
# The zlib compressed data begins with 2 bytes of header and
|
||||||
# has 4 bytes of checksum at the end
|
# has 4 bytes of checksum at the end
|
||||||
ans = {'raw_data':data, 'font_data':None, 'err':None, 'ext':'failed',
|
ans = {'raw_data':data, 'font_data':None, 'err':None, 'ext':'failed',
|
||||||
'headers':None}
|
'headers':None, 'encrypted':False}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
usize, flags, dstart, xor_len, xor_start = struct.unpack_from(
|
usize, flags, dstart, xor_len, xor_start = struct.unpack_from(
|
||||||
@ -453,6 +460,7 @@ def read_font_record(data, extent=1040): # {{{
|
|||||||
buf[n] ^= key[n%xor_len] # XOR of buf and key
|
buf[n] ^= key[n%xor_len] # XOR of buf and key
|
||||||
|
|
||||||
font_data = bytes(buf)
|
font_data = bytes(buf)
|
||||||
|
ans['encrypted'] = True
|
||||||
|
|
||||||
if flags & 0b1:
|
if flags & 0b1:
|
||||||
# ZLIB compressed data
|
# ZLIB compressed data
|
||||||
|
@ -109,6 +109,7 @@ class RTFMLizer(object):
|
|||||||
if item.spine_position is None:
|
if item.spine_position is None:
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||||
self.opts, self.opts.output_profile)
|
self.opts, self.opts.output_profile)
|
||||||
|
self.currently_dumping_item = item
|
||||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||||
output += '{\\page }'
|
output += '{\\page }'
|
||||||
for item in self.oeb_book.spine:
|
for item in self.oeb_book.spine:
|
||||||
@ -118,6 +119,7 @@ class RTFMLizer(object):
|
|||||||
content = self.remove_tabs(content)
|
content = self.remove_tabs(content)
|
||||||
content = etree.fromstring(content)
|
content = etree.fromstring(content)
|
||||||
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
|
self.currently_dumping_item = item
|
||||||
output += self.dump_text(content.find(XHTML('body')), stylizer)
|
output += self.dump_text(content.find(XHTML('body')), stylizer)
|
||||||
output += '{\\page }'
|
output += '{\\page }'
|
||||||
output += self.footer()
|
output += self.footer()
|
||||||
@ -160,9 +162,15 @@ class RTFMLizer(object):
|
|||||||
|
|
||||||
for item in self.oeb_book.manifest:
|
for item in self.oeb_book.manifest:
|
||||||
if item.media_type in OEB_RASTER_IMAGES:
|
if item.media_type in OEB_RASTER_IMAGES:
|
||||||
src = os.path.basename(item.href)
|
src = item.href
|
||||||
data, width, height = self.image_to_hexstring(item.data)
|
try:
|
||||||
text = text.replace('SPECIAL_IMAGE-%s-REPLACE_ME' % src, '\n\n{\\*\\shppict{\\pict\\picw%i\\pich%i\\jpegblip \n%s\n}}\n\n' % (width, height, data))
|
data, width, height = self.image_to_hexstring(item.data)
|
||||||
|
except:
|
||||||
|
self.log.warn('Image %s is corrupted, ignoring'%item.href)
|
||||||
|
repl = '\n\n'
|
||||||
|
else:
|
||||||
|
repl = '\n\n{\\*\\shppict{\\pict\\jpegblip\\picw%i\\pich%i \n%s\n}}\n\n' % (width, height, data)
|
||||||
|
text = text.replace('SPECIAL_IMAGE-%s-REPLACE_ME' % src, repl)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def image_to_hexstring(self, data):
|
def image_to_hexstring(self, data):
|
||||||
@ -205,7 +213,8 @@ class RTFMLizer(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def dump_text(self, elem, stylizer, tag_stack=[]):
|
def dump_text(self, elem, stylizer, tag_stack=[]):
|
||||||
from calibre.ebooks.oeb.base import XHTML_NS, namespace, barename
|
from calibre.ebooks.oeb.base import (XHTML_NS, namespace, barename,
|
||||||
|
urlnormalize)
|
||||||
|
|
||||||
if not isinstance(elem.tag, basestring) \
|
if not isinstance(elem.tag, basestring) \
|
||||||
or namespace(elem.tag) != XHTML_NS:
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
@ -234,13 +243,15 @@ class RTFMLizer(object):
|
|||||||
# Process tags that need special processing and that do not have inner
|
# Process tags that need special processing and that do not have inner
|
||||||
# text. Usually these require an argument
|
# text. Usually these require an argument
|
||||||
if tag == 'img':
|
if tag == 'img':
|
||||||
src = os.path.basename(elem.get('src'))
|
src = elem.get('src')
|
||||||
block_start = ''
|
if src:
|
||||||
block_end = ''
|
src = urlnormalize(self.currently_dumping_item.abshref(src))
|
||||||
if 'block' not in tag_stack:
|
block_start = ''
|
||||||
block_start = '{\\par\\pard\\hyphpar '
|
block_end = ''
|
||||||
block_end = '}'
|
if 'block' not in tag_stack:
|
||||||
text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
|
block_start = '{\\par\\pard\\hyphpar '
|
||||||
|
block_end = '}'
|
||||||
|
text += '%s SPECIAL_IMAGE-%s-REPLACE_ME %s' % (block_start, src, block_end)
|
||||||
|
|
||||||
single_tag = SINGLE_TAGS.get(tag, None)
|
single_tag = SINGLE_TAGS.get(tag, None)
|
||||||
if single_tag:
|
if single_tag:
|
||||||
|
@ -70,6 +70,9 @@ class AddAction(InterfaceAction):
|
|||||||
self.add_menu.addSeparator()
|
self.add_menu.addSeparator()
|
||||||
ma('add-formats', _('Add files to selected book records'),
|
ma('add-formats', _('Add files to selected book records'),
|
||||||
triggered=self.add_formats, shortcut=_('Shift+A'))
|
triggered=self.add_formats, shortcut=_('Shift+A'))
|
||||||
|
self.add_menu.addSeparator()
|
||||||
|
ma('add-config', _('Configure the adding of books'),
|
||||||
|
triggered=self.add_config)
|
||||||
|
|
||||||
self.qaction.triggered.connect(self.add_books)
|
self.qaction.triggered.connect(self.add_books)
|
||||||
|
|
||||||
@ -78,6 +81,11 @@ class AddAction(InterfaceAction):
|
|||||||
for action in list(self.add_menu.actions())[1:]:
|
for action in list(self.add_menu.actions())[1:]:
|
||||||
action.setEnabled(enabled)
|
action.setEnabled(enabled)
|
||||||
|
|
||||||
|
def add_config(self):
|
||||||
|
self.gui.iactions['Preferences'].do_config(
|
||||||
|
initial_plugin=('Import/Export', 'Adding'),
|
||||||
|
close_after_initial=True)
|
||||||
|
|
||||||
def add_formats(self, *args):
|
def add_formats(self, *args):
|
||||||
if self.gui.stack.currentIndex() != 0:
|
if self.gui.stack.currentIndex() != 0:
|
||||||
return
|
return
|
||||||
|
@ -13,6 +13,7 @@ from calibre.gui2 import choose_dir, error_dialog, warning_dialog
|
|||||||
from calibre.gui2.tools import generate_catalog
|
from calibre.gui2.tools import generate_catalog
|
||||||
from calibre.utils.config import dynamic
|
from calibre.utils.config import dynamic
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
|
from calibre import sanitize_file_name_unicode
|
||||||
|
|
||||||
class GenerateCatalogAction(InterfaceAction):
|
class GenerateCatalogAction(InterfaceAction):
|
||||||
|
|
||||||
@ -89,7 +90,8 @@ class GenerateCatalogAction(InterfaceAction):
|
|||||||
_('Select destination for %(title)s.%(fmt)s') % dict(
|
_('Select destination for %(title)s.%(fmt)s') % dict(
|
||||||
title=job.catalog_title, fmt=job.fmt.lower()))
|
title=job.catalog_title, fmt=job.fmt.lower()))
|
||||||
if export_dir:
|
if export_dir:
|
||||||
destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower()))
|
destination = os.path.join(export_dir, '%s.%s' % (
|
||||||
|
sanitize_file_name_unicode(job.catalog_title), job.fmt.lower()))
|
||||||
shutil.copyfile(job.catalog_file_path, destination)
|
shutil.copyfile(job.catalog_file_path, destination)
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,7 +13,8 @@ from contextlib import closing
|
|||||||
from PyQt4.Qt import QToolButton
|
from PyQt4.Qt import QToolButton
|
||||||
|
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
from calibre.gui2 import error_dialog, Dispatcher, warning_dialog, gprefs
|
from calibre.gui2 import (error_dialog, Dispatcher, warning_dialog, gprefs,
|
||||||
|
info_dialog)
|
||||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||||
from calibre.utils.config import prefs, tweaks
|
from calibre.utils.config import prefs, tweaks
|
||||||
from calibre.utils.date import now
|
from calibre.utils.date import now
|
||||||
@ -30,6 +31,7 @@ class Worker(Thread): # {{{
|
|||||||
self.progress = progress
|
self.progress = progress
|
||||||
self.done = done
|
self.done = done
|
||||||
self.delete_after = delete_after
|
self.delete_after = delete_after
|
||||||
|
self.auto_merged_ids = {}
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
try:
|
try:
|
||||||
@ -79,6 +81,8 @@ class Worker(Thread): # {{{
|
|||||||
if prefs['add_formats_to_existing']:
|
if prefs['add_formats_to_existing']:
|
||||||
identical_book_list = newdb.find_identical_books(mi)
|
identical_book_list = newdb.find_identical_books(mi)
|
||||||
if identical_book_list: # books with same author and nearly same title exist in newdb
|
if identical_book_list: # books with same author and nearly same title exist in newdb
|
||||||
|
self.auto_merged_ids[x] = _('%s by %s')%(mi.title,
|
||||||
|
mi.format_field('authors')[1])
|
||||||
automerged = True
|
automerged = True
|
||||||
seen_fmts = set()
|
seen_fmts = set()
|
||||||
for identical_book in identical_book_list:
|
for identical_book in identical_book_list:
|
||||||
@ -196,6 +200,15 @@ class CopyToLibraryAction(InterfaceAction):
|
|||||||
self.gui.status_bar.show_message(
|
self.gui.status_bar.show_message(
|
||||||
_('Copied %(num)d books to %(loc)s') %
|
_('Copied %(num)d books to %(loc)s') %
|
||||||
dict(num=len(ids), loc=loc), 2000)
|
dict(num=len(ids), loc=loc), 2000)
|
||||||
|
if self.worker.auto_merged_ids:
|
||||||
|
books = '\n'.join(self.worker.auto_merged_ids.itervalues())
|
||||||
|
info_dialog(self.gui, _('Auto merged'),
|
||||||
|
_('Some books were automatically merged into existing '
|
||||||
|
'records in the target library. Click Show '
|
||||||
|
'details to see which ones. This behavior is '
|
||||||
|
'controlled by the Auto merge option in '
|
||||||
|
'Preferences->Adding books.'), det_msg=books,
|
||||||
|
show=True)
|
||||||
if delete_after and self.worker.processed:
|
if delete_after and self.worker.processed:
|
||||||
v = self.gui.library_view
|
v = self.gui.library_view
|
||||||
ci = v.currentIndex()
|
ci = v.currentIndex()
|
||||||
|
@ -5,4 +5,3 @@ or asked not to be included in the store integration.
|
|||||||
* Indigo (http://www.chapters.indigo.ca/).
|
* Indigo (http://www.chapters.indigo.ca/).
|
||||||
* Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
|
* Libraria Rizzoli (http://libreriarizzoli.corriere.it/).
|
||||||
* EPubBuy DE: reason: too much traffic for too little sales
|
* EPubBuy DE: reason: too much traffic for too little sales
|
||||||
* Empik (http://empik.com.pl).
|
|
||||||
|
@ -41,7 +41,9 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
@ -65,8 +67,8 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
|
|
||||||
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
||||||
if author.startswith('von '):
|
if author.startswith('von '):
|
||||||
|
@ -37,7 +37,9 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
@ -61,8 +63,8 @@ class AmazonESKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||||
if author.startswith('de '):
|
if author.startswith('de '):
|
||||||
author = author[3:]
|
author = author[3:]
|
||||||
|
@ -39,7 +39,7 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
# Apparently amazon.fr is responding in UTF-8 now
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
@ -64,8 +64,8 @@ class AmazonFRKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||||
if author.startswith('de '):
|
if author.startswith('de '):
|
||||||
author = author[3:]
|
author = author[3:]
|
||||||
|
@ -37,7 +37,9 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
@ -61,8 +63,8 @@ class AmazonITKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||||
if author.startswith('di '):
|
if author.startswith('di '):
|
||||||
author = author[3:]
|
author = author[3:]
|
||||||
|
@ -38,7 +38,8 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
# Apparently amazon Europe is responding in UTF-8 now
|
||||||
|
doc = html.fromstring(f.read())
|
||||||
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
@ -62,8 +63,8 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
|
|
||||||
cover_url = ''.join(data.xpath(cover_xpath))
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||||
|
|
||||||
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
||||||
if author.startswith('by '):
|
if author.startswith('by '):
|
||||||
|
@ -62,7 +62,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
|
title = ''.join(data.xpath('.//p[@class="title"]//span[@class="name"]/text()'))
|
||||||
author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
|
author = ', '.join(data.xpath('.//ul[@class="contributors"]//li[position()>1]//a/text()'))
|
||||||
price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[@class="subtle"]/text()'))
|
price = ''.join(data.xpath('.//table[@class="displayed-formats"]//a[contains(@class, "bn-price")]/text()'))
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import urllib2
|
import urllib
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -33,7 +33,7 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
detail_url = None
|
detail_url = None
|
||||||
if detail_item:
|
if detail_item:
|
||||||
detail_url = url + detail_item + aff_id
|
detail_url = detail_item + aff_id
|
||||||
url = url + aff_id
|
url = url + aff_id
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
if external or self.config.get('open_external', False):
|
||||||
@ -45,33 +45,36 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
|
|||||||
d.exec_()
|
d.exec_()
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib2.quote(query)
|
url = 'http://www.diesel-ebooks.com/index.php?page=seek&id[m]=&id[c]=scope%253Dinventory&id[q]=' + urllib.quote_plus(query)
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read())
|
||||||
for data in doc.xpath('//div[@class="item clearfix"]'):
|
for data in doc.xpath('//div[contains(@class, "item")]'):
|
||||||
data = html.fromstring(html.tostring(data))
|
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
id = ''.join(data.xpath('div[@class="cover"]/a/@href'))
|
id = ''.join(data.xpath('div[@class="cover"]/a/@href'))
|
||||||
if not id or '/item/' not in id:
|
if not id or '/item/' not in id:
|
||||||
continue
|
continue
|
||||||
a, b, id = id.partition('/item/')
|
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
|
cover_url = ''.join(data.xpath('div[@class="cover"]//img/@src'))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="content"]//h2/text()'))
|
title = ''.join(data.xpath('.//div[@class="content"]//h2/a/text()'))
|
||||||
author = ''.join(data.xpath('//div[@class="content"]//div[@class="author"]/a/text()'))
|
author = ''.join(data.xpath('.//div[@class="content"]/span//a/text()'))
|
||||||
price = ''
|
price = ''
|
||||||
price_elem = data.xpath('//td[@class="price"]/text()')
|
price_elem = data.xpath('.//div[@class="price_fat"]//h1/text()')
|
||||||
if price_elem:
|
if price_elem:
|
||||||
price = price_elem[0]
|
price = price_elem[0]
|
||||||
|
|
||||||
formats = ', '.join(data.xpath('.//td[@class="format"]/text()'))
|
formats = ', '.join(data.xpath('.//div[@class="book-info"]//text()')).strip()
|
||||||
|
a, b, formats = formats.partition('Format:')
|
||||||
|
drm = SearchResult.DRM_LOCKED
|
||||||
|
if 'drm free' not in formats.lower():
|
||||||
|
drm = SearchResult.DRM_UNLOCKED
|
||||||
|
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
@ -80,19 +83,8 @@ class DieselEbooksStore(BasicStoreConfig, StorePlugin):
|
|||||||
s.title = title.strip()
|
s.title = title.strip()
|
||||||
s.author = author.strip()
|
s.author = author.strip()
|
||||||
s.price = price.strip()
|
s.price = price.strip()
|
||||||
s.detail_item = '/item/' + id.strip()
|
s.detail_item = id.strip()
|
||||||
s.formats = formats
|
s.formats = formats
|
||||||
|
s.drm = drm
|
||||||
|
|
||||||
yield s
|
yield s
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
|
||||||
url = 'http://www.diesel-ebooks.com/item/'
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
with closing(br.open(url + search_result.detail_item, timeout=timeout)) as nf:
|
|
||||||
idata = html.fromstring(nf.read())
|
|
||||||
if idata.xpath('boolean(//table[@class="format-info"]//tr[contains(th, "DRM") and contains(td, "No")])'):
|
|
||||||
search_result.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
else:
|
|
||||||
search_result.drm = SearchResult.DRM_LOCKED
|
|
||||||
return True
|
|
||||||
|
@ -60,10 +60,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('.//a[@class="Jacket"]/img/@src'))
|
cover_url = ''.join(data.xpath('.//a[@class="Jacket"]/img/@src'))
|
||||||
if cover_url:
|
|
||||||
cover_url = 'http://www.foyles.co.uk' + cover_url
|
|
||||||
#print(cover_url)
|
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
|
title = ''.join(data.xpath('.//a[@class="Title"]/text()'))
|
||||||
author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
|
author = ', '.join(data.xpath('.//span[@class="Author"]/text()'))
|
||||||
price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
|
price = ''.join(data.xpath('./ul/li[@class="Strong"]/text()'))
|
||||||
|
@ -68,7 +68,7 @@ class KoboStore(BasicStoreConfig, StorePlugin):
|
|||||||
cover_url = ''.join(data.xpath('.//div[@class="SearchImageContainer"]//img[1]/@src'))
|
cover_url = ''.join(data.xpath('.//div[@class="SearchImageContainer"]//img[1]/@src'))
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
|
title = ''.join(data.xpath('.//div[@class="SCItemHeader"]/h1/a[1]/text()'))
|
||||||
author = ''.join(data.xpath('.//div[@class="SCItemSummary"]/span/a[1]/text()'))
|
author = ', '.join(data.xpath('.//div[@class="SCItemSummary"]//span//a/text()'))
|
||||||
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
|
drm = data.xpath('boolean(.//span[@class="SCAvailibilityFormatsText" and contains(text(), "DRM")])')
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
|
__copyright__ = '2011-2012, Tomasz Długosz <tomek3d@gmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -47,41 +47,47 @@ class NextoStore(BasicStoreConfig, StorePlugin):
|
|||||||
url = 'http://www.nexto.pl/szukaj.xml?search-clause=' + urllib.quote_plus(query) + '&scid=1015'
|
url = 'http://www.nexto.pl/szukaj.xml?search-clause=' + urllib.quote_plus(query) + '&scid=1015'
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
|
offset=0
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
|
||||||
doc = html.fromstring(f.read())
|
while counter:
|
||||||
for data in doc.xpath('//ul[@class="productslist"]/li'):
|
with closing(br.open(url + '&_offset=' + str(offset), timeout=timeout)) as f:
|
||||||
if counter <= 0:
|
doc = html.fromstring(f.read())
|
||||||
|
for data in doc.xpath('//ul[@class="productslist"]/li'):
|
||||||
|
if counter <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
id = ''.join(data.xpath('.//div[@class="cover_container"]/a[1]/@href'))
|
||||||
|
if not id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
price = ''.join(data.xpath('.//strong[@class="nprice"]/text()'))
|
||||||
|
|
||||||
|
cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
|
||||||
|
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
||||||
|
title = re.sub(r' - ebook$', '', title)
|
||||||
|
formats = ', '.join(data.xpath('.//ul[@class="formats_available"]/li//b/text()'))
|
||||||
|
DrmFree = re.search(r'bez.DRM', formats)
|
||||||
|
formats = re.sub(r'\(.+\)', '', formats)
|
||||||
|
|
||||||
|
author = ''
|
||||||
|
with closing(br.open('http://www.nexto.pl/' + id.strip(), timeout=timeout/4)) as nf:
|
||||||
|
idata = html.fromstring(nf.read())
|
||||||
|
author = ', '.join(idata.xpath('//div[@class="basic_data"]/p[1]/b/a/text()'))
|
||||||
|
|
||||||
|
counter -= 1
|
||||||
|
|
||||||
|
s = SearchResult()
|
||||||
|
s.cover_url = cover_url
|
||||||
|
s.title = title.strip()
|
||||||
|
s.author = author.strip()
|
||||||
|
s.price = price
|
||||||
|
s.detail_item = id.strip()
|
||||||
|
s.drm = SearchResult.DRM_UNLOCKED if DrmFree else SearchResult.DRM_LOCKED
|
||||||
|
s.formats = formats.upper().strip()
|
||||||
|
|
||||||
|
yield s
|
||||||
|
if not doc.xpath('//div[@class="listnavigator"]//a[@class="next"]'):
|
||||||
break
|
break
|
||||||
|
offset+=10
|
||||||
id = ''.join(data.xpath('.//div[@class="cover_container"]/a[1]/@href'))
|
|
||||||
if not id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
price = ''.join(data.xpath('.//strong[@class="nprice"]/text()'))
|
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
|
|
||||||
title = ''.join(data.xpath('.//a[@class="title"]/text()'))
|
|
||||||
title = re.sub(r' - ebook$', '', title)
|
|
||||||
formats = ', '.join(data.xpath('.//ul[@class="formats_available"]/li//b/text()'))
|
|
||||||
DrmFree = re.search(r'bez.DRM', formats)
|
|
||||||
formats = re.sub(r'\(.+\)', '', formats)
|
|
||||||
|
|
||||||
author = ''
|
|
||||||
with closing(br.open('http://www.nexto.pl/' + id.strip(), timeout=timeout/4)) as nf:
|
|
||||||
idata = html.fromstring(nf.read())
|
|
||||||
author = ', '.join(idata.xpath('//div[@class="basic_data"]/p[1]/b/a/text()'))
|
|
||||||
|
|
||||||
counter -= 1
|
|
||||||
|
|
||||||
s = SearchResult()
|
|
||||||
s.cover_url = cover_url
|
|
||||||
s.title = title.strip()
|
|
||||||
s.author = author.strip()
|
|
||||||
s.price = price
|
|
||||||
s.detail_item = id.strip()
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED if DrmFree else SearchResult.DRM_LOCKED
|
|
||||||
s.formats = formats.upper().strip()
|
|
||||||
|
|
||||||
yield s
|
|
||||||
|
@ -57,7 +57,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
|
|||||||
cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
|
cover_url = ''.join(data.xpath('.//div[@class="image"]/a/img/@src'))
|
||||||
title = ''.join(data.xpath('./div/div/h2/a/text()'))
|
title = ''.join(data.xpath('./div/div/h2/a/text()'))
|
||||||
author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
|
author = ', '.join(data.xpath('.//p[@class="byAuthor"]/a/text()'))
|
||||||
price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceStandard"]/text()'))
|
price = ''.join(data.xpath('.//p[@class="price"]/span[@class="priceRed2"]/text()'))
|
||||||
drm = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "DRM")])')
|
drm = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "DRM")])')
|
||||||
pdf = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "PDF")])')
|
pdf = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "PDF")])')
|
||||||
epub = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "EPUB")])')
|
epub = data.xpath('boolean(.//td[@headers="productFormat" and contains(., "EPUB")])')
|
||||||
|
@ -1,118 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from PyQt4.Qt import QUrl
|
|
||||||
|
|
||||||
from calibre import browser, url_slash_cleaner
|
|
||||||
from calibre.gui2 import open_url
|
|
||||||
from calibre.gui2.store import StorePlugin
|
|
||||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
|
||||||
from calibre.gui2.store.search_result import SearchResult
|
|
||||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|
||||||
|
|
||||||
class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
|
|
||||||
|
|
||||||
url = 'http://www.wizardstowerbooks.com/'
|
|
||||||
|
|
||||||
def open(self, parent=None, detail_item=None, external=False):
|
|
||||||
if detail_item:
|
|
||||||
detail_item = self.url + detail_item
|
|
||||||
|
|
||||||
if external or self.config.get('open_external', False):
|
|
||||||
open_url(QUrl(url_slash_cleaner(detail_item)))
|
|
||||||
else:
|
|
||||||
d = WebStoreDialog(self.gui, self.url, parent, detail_item)
|
|
||||||
d.setWindowTitle(self.name)
|
|
||||||
d.set_tags(self.config.get('tags', ''))
|
|
||||||
d.exec_()
|
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
|
||||||
url = 'http://www.wizardstowerbooks.com/search.html?for=' + urllib.quote(query)
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
|
|
||||||
counter = max_results
|
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
|
||||||
doc = html.fromstring(f.read())
|
|
||||||
if 'search.html' in f.geturl():
|
|
||||||
for data in doc.xpath('//table[@class="gridp"]//td'):
|
|
||||||
if counter <= 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
id = ''.join(data.xpath('.//span[@class="prti"]/a/@href'))
|
|
||||||
id = id.strip()
|
|
||||||
if not id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
cover_url = ''.join(data.xpath('.//div[@class="prim"]/a/img/@src'))
|
|
||||||
cover_url = url_slash_cleaner(self.url + cover_url.strip())
|
|
||||||
|
|
||||||
price = ''.join(data.xpath('.//font[@class="selling_price"]//text()'))
|
|
||||||
price = price.strip()
|
|
||||||
if not price:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = ''.join(data.xpath('.//span[@class="prti"]/a/b/text()'))
|
|
||||||
author = ''.join(data.xpath('.//p[@class="last"]/text()'))
|
|
||||||
a, b, author = author.partition(' by ')
|
|
||||||
|
|
||||||
counter -= 1
|
|
||||||
|
|
||||||
s = SearchResult()
|
|
||||||
s.cover_url = cover_url
|
|
||||||
s.title = title.strip()
|
|
||||||
s.author = author.strip()
|
|
||||||
s.price = price.strip()
|
|
||||||
s.detail_item = id.strip()
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
|
|
||||||
yield s
|
|
||||||
# Exact match brought us to the books detail page.
|
|
||||||
else:
|
|
||||||
s = SearchResult()
|
|
||||||
|
|
||||||
cover_url = ''.join(doc.xpath('//div[@id="image"]/a/img[@title="Zoom"]/@src')).strip()
|
|
||||||
s.cover_url = url_slash_cleaner(self.url + cover_url.strip())
|
|
||||||
|
|
||||||
s.title = ''.join(doc.xpath('//form[@name="details"]/h1/text()')).strip()
|
|
||||||
|
|
||||||
authors = doc.xpath('//p[contains(., "Author:")]//text()')
|
|
||||||
author_index = None
|
|
||||||
for i, a in enumerate(authors):
|
|
||||||
if 'author' in a.lower():
|
|
||||||
author_index = i + 1
|
|
||||||
break
|
|
||||||
if author_index is not None and len(authors) > author_index:
|
|
||||||
a = authors[author_index]
|
|
||||||
a = a.replace(u'\xa0', '')
|
|
||||||
s.author = a.strip()
|
|
||||||
|
|
||||||
s.price = ''.join(doc.xpath('//span[@id="price_selling"]//text()')).strip()
|
|
||||||
s.detail_item = f.geturl().replace(self.url, '').strip()
|
|
||||||
s.formats = ', '.join(doc.xpath('//select[@id="N1_"]//option//text()'))
|
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
|
||||||
|
|
||||||
yield s
|
|
||||||
|
|
||||||
def get_details(self, search_result, timeout):
|
|
||||||
if search_result.formats:
|
|
||||||
return False
|
|
||||||
|
|
||||||
br = browser()
|
|
||||||
with closing(br.open(url_slash_cleaner(self.url + search_result.detail_item), timeout=timeout)) as nf:
|
|
||||||
idata = html.fromstring(nf.read())
|
|
||||||
|
|
||||||
formats = ', '.join(idata.xpath('//select[@id="N1_"]//option//text()'))
|
|
||||||
search_result.formats = formats.upper()
|
|
||||||
|
|
||||||
return True
|
|
@ -3,7 +3,7 @@
|
|||||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
|
__copyright__ = '2011-2012, Tomasz Długosz <tomek3d@gmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -41,6 +41,11 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = 'http://woblink.com/publication?query=' + urllib.quote_plus(query.encode('utf-8'))
|
url = 'http://woblink.com/publication?query=' + urllib.quote_plus(query.encode('utf-8'))
|
||||||
|
if max_results > 10:
|
||||||
|
if max_results > 20:
|
||||||
|
url += '&limit=' + str(30)
|
||||||
|
else:
|
||||||
|
url += '&limit=' + str(20)
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
@ -58,15 +63,16 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
|
|||||||
cover_url = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/img/@src'))
|
cover_url = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/img/@src'))
|
||||||
title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
|
title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
|
||||||
author = ', '.join(data.xpath('.//p[@class="author"]/a/text()'))
|
author = ', '.join(data.xpath('.//p[@class="author"]/a/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="prices"]/p[1]/span/text()'))
|
price = ''.join(data.xpath('.//div[@class="prices"]/span[1]/span/text()'))
|
||||||
price = re.sub('PLN', ' zł', price)
|
|
||||||
price = re.sub('\.', ',', price)
|
price = re.sub('\.', ',', price)
|
||||||
formats = ', '.join(data.xpath('.//p[3]/img/@src'))
|
formats = [ form[8:-4].split('_')[0] for form in data.xpath('.//p[3]/img/@src')]
|
||||||
formats = formats[8:-4].upper()
|
if 'epub' in formats:
|
||||||
if formats == 'EPUB':
|
formats.remove('epub')
|
||||||
formats = 'WOBLINK'
|
formats.append('WOBLINK')
|
||||||
if 'E Ink' in data.xpath('.//div[@class="prices"]/img/@title'):
|
if 'E Ink' in data.xpath('.//div[@class="prices"]/img/@title'):
|
||||||
formats += ', EPUB'
|
formats.insert(0, 'EPUB')
|
||||||
|
if 'pdf' in formats:
|
||||||
|
formats[formats.index('pdf')] = 'PDF'
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
@ -74,9 +80,9 @@ class WoblinkStore(BasicStoreConfig, StorePlugin):
|
|||||||
s.cover_url = 'http://woblink.com' + cover_url
|
s.cover_url = 'http://woblink.com' + cover_url
|
||||||
s.title = title.strip()
|
s.title = title.strip()
|
||||||
s.author = author.strip()
|
s.author = author.strip()
|
||||||
s.price = price
|
s.price = price + ' zł'
|
||||||
s.detail_item = id.strip()
|
s.detail_item = id.strip()
|
||||||
s.drm = SearchResult.DRM_LOCKED
|
s.drm = SearchResult.DRM_UNKNOWN if 'MOBI' in formats else SearchResult.DRM_LOCKED
|
||||||
s.formats = formats
|
s.formats = ', '.join(formats)
|
||||||
|
|
||||||
yield s
|
yield s
|
||||||
|
@ -1170,6 +1170,8 @@ class TagsModel(QAbstractItemModel): # {{{
|
|||||||
charclass = ''.join(letters_seen)
|
charclass = ''.join(letters_seen)
|
||||||
if k == 'author_sort':
|
if k == 'author_sort':
|
||||||
expr = r'%s:"~(^[%s])|(&\s*[%s])"'%(k, charclass, charclass)
|
expr = r'%s:"~(^[%s])|(&\s*[%s])"'%(k, charclass, charclass)
|
||||||
|
elif k == 'series':
|
||||||
|
expr = r'series_sort:"~^[%s]"'%(charclass)
|
||||||
else:
|
else:
|
||||||
expr = r'%s:"~^[%s]"'%(k, charclass)
|
expr = r'%s:"~^[%s]"'%(k, charclass)
|
||||||
if node_searches[tag_item.tag.state] == 'true':
|
if node_searches[tag_item.tag.state] == 'true':
|
||||||
|
@ -255,7 +255,10 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="1">
|
<item row="3" column="1">
|
||||||
<widget class="QSpinBox" name="max_view_width">
|
<widget class="QSpinBox" name="max_fs_width">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Set the maximum width that the book's text and pictures will take when in fullscreen mode. This allows you to read the book text without it becoming too wide.</string>
|
||||||
|
</property>
|
||||||
<property name="suffix">
|
<property name="suffix">
|
||||||
<string> px</string>
|
<string> px</string>
|
||||||
</property>
|
</property>
|
||||||
@ -270,10 +273,10 @@
|
|||||||
<item row="3" column="0">
|
<item row="3" column="0">
|
||||||
<widget class="QLabel" name="label_7">
|
<widget class="QLabel" name="label_7">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Maximum &view width:</string>
|
<string>Maximum text width in &fullscreen:</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="buddy">
|
<property name="buddy">
|
||||||
<cstring>max_view_width</cstring>
|
<cstring>max_fs_width</cstring>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
@ -350,7 +353,7 @@
|
|||||||
<tabstop>serif_family</tabstop>
|
<tabstop>serif_family</tabstop>
|
||||||
<tabstop>sans_family</tabstop>
|
<tabstop>sans_family</tabstop>
|
||||||
<tabstop>mono_family</tabstop>
|
<tabstop>mono_family</tabstop>
|
||||||
<tabstop>max_view_width</tabstop>
|
<tabstop>max_fs_width</tabstop>
|
||||||
<tabstop>opt_remember_window_size</tabstop>
|
<tabstop>opt_remember_window_size</tabstop>
|
||||||
<tabstop>buttonBox</tabstop>
|
<tabstop>buttonBox</tabstop>
|
||||||
</tabstops>
|
</tabstops>
|
||||||
|
@ -8,11 +8,11 @@ import os, math, re, glob, sys, zipfile
|
|||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer,
|
from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt,
|
||||||
QPainter, QPalette, QBrush, QFontDatabase, QDialog,
|
QPainter, QPalette, QBrush, QFontDatabase, QDialog,
|
||||||
QColor, QPoint, QImage, QRegion, QVariant, QIcon,
|
QColor, QPoint, QImage, QRegion, QVariant, QIcon,
|
||||||
QFont, pyqtSignature, QAction, QByteArray, QMenu,
|
QFont, pyqtSignature, QAction, QByteArray, QMenu,
|
||||||
pyqtSignal, QSwipeGesture)
|
pyqtSignal, QSwipeGesture, QApplication)
|
||||||
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
|
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
|
||||||
|
|
||||||
from calibre.utils.config import Config, StringConfig
|
from calibre.utils.config import Config, StringConfig
|
||||||
@ -46,8 +46,10 @@ def config(defaults=None):
|
|||||||
help=_('Remember last used window size'))
|
help=_('Remember last used window size'))
|
||||||
c.add_opt('user_css', default='',
|
c.add_opt('user_css', default='',
|
||||||
help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
|
help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
|
||||||
c.add_opt('max_view_width', default=6000,
|
c.add_opt('max_fs_width', default=800,
|
||||||
help=_('Maximum width of the viewer window, in pixels.'))
|
help=_("Set the maximum width that the book's text and pictures will take"
|
||||||
|
" when in fullscreen mode. This allows you to read the book text"
|
||||||
|
" without it becoming too wide."))
|
||||||
c.add_opt('fit_images', default=True,
|
c.add_opt('fit_images', default=True,
|
||||||
help=_('Resize images larger than the viewer window to fit inside it'))
|
help=_('Resize images larger than the viewer window to fit inside it'))
|
||||||
c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
|
c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
|
||||||
@ -101,7 +103,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
|||||||
self.standard_font.setCurrentIndex({'serif':0, 'sans':1, 'mono':2}[opts.standard_font])
|
self.standard_font.setCurrentIndex({'serif':0, 'sans':1, 'mono':2}[opts.standard_font])
|
||||||
self.css.setPlainText(opts.user_css)
|
self.css.setPlainText(opts.user_css)
|
||||||
self.css.setToolTip(_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
|
self.css.setToolTip(_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
|
||||||
self.max_view_width.setValue(opts.max_view_width)
|
self.max_fs_width.setValue(opts.max_fs_width)
|
||||||
with zipfile.ZipFile(P('viewer/hyphenate/patterns.zip',
|
with zipfile.ZipFile(P('viewer/hyphenate/patterns.zip',
|
||||||
allow_user_override=False), 'r') as zf:
|
allow_user_override=False), 'r') as zf:
|
||||||
pats = [x.split('.')[0].replace('-', '_') for x in zf.namelist()]
|
pats = [x.split('.')[0].replace('-', '_') for x in zf.namelist()]
|
||||||
@ -144,7 +146,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
|||||||
c.set('user_css', unicode(self.css.toPlainText()))
|
c.set('user_css', unicode(self.css.toPlainText()))
|
||||||
c.set('remember_window_size', self.opt_remember_window_size.isChecked())
|
c.set('remember_window_size', self.opt_remember_window_size.isChecked())
|
||||||
c.set('fit_images', self.opt_fit_images.isChecked())
|
c.set('fit_images', self.opt_fit_images.isChecked())
|
||||||
c.set('max_view_width', int(self.max_view_width.value()))
|
c.set('max_fs_width', int(self.max_fs_width.value()))
|
||||||
c.set('hyphenate', self.hyphenate.isChecked())
|
c.set('hyphenate', self.hyphenate.isChecked())
|
||||||
c.set('remember_current_page', self.opt_remember_current_page.isChecked())
|
c.set('remember_current_page', self.opt_remember_current_page.isChecked())
|
||||||
c.set('wheel_flips_pages', self.opt_wheel_flips_pages.isChecked())
|
c.set('wheel_flips_pages', self.opt_wheel_flips_pages.isChecked())
|
||||||
@ -182,16 +184,16 @@ class Document(QWebPage): # {{{
|
|||||||
self.misc_config()
|
self.misc_config()
|
||||||
self.after_load()
|
self.after_load()
|
||||||
|
|
||||||
def __init__(self, shortcuts, parent=None, resize_callback=lambda: None,
|
def __init__(self, shortcuts, parent=None, debug_javascript=False):
|
||||||
debug_javascript=False):
|
|
||||||
QWebPage.__init__(self, parent)
|
QWebPage.__init__(self, parent)
|
||||||
self.setObjectName("py_bridge")
|
self.setObjectName("py_bridge")
|
||||||
self.debug_javascript = debug_javascript
|
self.debug_javascript = debug_javascript
|
||||||
self.resize_callback = resize_callback
|
|
||||||
self.current_language = None
|
self.current_language = None
|
||||||
self.loaded_javascript = False
|
self.loaded_javascript = False
|
||||||
self.js_loader = JavaScriptLoader(
|
self.js_loader = JavaScriptLoader(
|
||||||
dynamic_coffeescript=self.debug_javascript)
|
dynamic_coffeescript=self.debug_javascript)
|
||||||
|
self.initial_left_margin = self.initial_right_margin = u''
|
||||||
|
self.in_fullscreen_mode = False
|
||||||
|
|
||||||
self.setLinkDelegationPolicy(self.DelegateAllLinks)
|
self.setLinkDelegationPolicy(self.DelegateAllLinks)
|
||||||
self.scroll_marks = []
|
self.scroll_marks = []
|
||||||
@ -239,6 +241,9 @@ class Document(QWebPage): # {{{
|
|||||||
self.enable_page_flip = self.page_flip_duration > 0.1
|
self.enable_page_flip = self.page_flip_duration > 0.1
|
||||||
self.font_magnification_step = opts.font_magnification_step
|
self.font_magnification_step = opts.font_magnification_step
|
||||||
self.wheel_flips_pages = opts.wheel_flips_pages
|
self.wheel_flips_pages = opts.wheel_flips_pages
|
||||||
|
screen_width = QApplication.desktop().screenGeometry().width()
|
||||||
|
# Leave some space for the scrollbar and some border
|
||||||
|
self.max_fs_width = min(opts.max_fs_width, screen_width-50)
|
||||||
|
|
||||||
def fit_images(self):
|
def fit_images(self):
|
||||||
if self.do_fit_images:
|
if self.do_fit_images:
|
||||||
@ -252,12 +257,6 @@ class Document(QWebPage): # {{{
|
|||||||
if self.loaded_javascript:
|
if self.loaded_javascript:
|
||||||
return
|
return
|
||||||
self.loaded_javascript = True
|
self.loaded_javascript = True
|
||||||
self.javascript(
|
|
||||||
'''
|
|
||||||
window.onresize = function(event) {
|
|
||||||
window.py_bridge.window_resized();
|
|
||||||
}
|
|
||||||
''')
|
|
||||||
self.loaded_lang = self.js_loader(self.mainFrame().evaluateJavaScript,
|
self.loaded_lang = self.js_loader(self.mainFrame().evaluateJavaScript,
|
||||||
self.current_language, self.hyphenate_default_lang)
|
self.current_language, self.hyphenate_default_lang)
|
||||||
|
|
||||||
@ -274,15 +273,35 @@ class Document(QWebPage): # {{{
|
|||||||
self.set_bottom_padding(0)
|
self.set_bottom_padding(0)
|
||||||
self.fit_images()
|
self.fit_images()
|
||||||
self.init_hyphenate()
|
self.init_hyphenate()
|
||||||
|
self.initial_left_margin = unicode(self.javascript(
|
||||||
|
'document.body.style.marginLeft').toString())
|
||||||
|
self.initial_right_margin = unicode(self.javascript(
|
||||||
|
'document.body.style.marginRight').toString())
|
||||||
|
if self.in_fullscreen_mode:
|
||||||
|
self.switch_to_fullscreen_mode()
|
||||||
|
|
||||||
|
def switch_to_fullscreen_mode(self):
|
||||||
|
self.in_fullscreen_mode = True
|
||||||
|
self.javascript('''
|
||||||
|
var s = document.body.style;
|
||||||
|
s.maxWidth = "%dpx";
|
||||||
|
s.marginLeft = "auto";
|
||||||
|
s.marginRight = "auto";
|
||||||
|
'''%self.max_fs_width)
|
||||||
|
|
||||||
|
def switch_to_window_mode(self):
|
||||||
|
self.in_fullscreen_mode = False
|
||||||
|
self.javascript('''
|
||||||
|
var s = document.body.style;
|
||||||
|
s.maxWidth = "none";
|
||||||
|
s.marginLeft = "%s";
|
||||||
|
s.marginRight = "%s";
|
||||||
|
'''%(self.initial_left_margin, self.initial_right_margin))
|
||||||
|
|
||||||
@pyqtSignature("QString")
|
@pyqtSignature("QString")
|
||||||
def debug(self, msg):
|
def debug(self, msg):
|
||||||
prints(msg)
|
prints(msg)
|
||||||
|
|
||||||
@pyqtSignature('')
|
|
||||||
def window_resized(self):
|
|
||||||
self.resize_callback()
|
|
||||||
|
|
||||||
def reference_mode(self, enable):
|
def reference_mode(self, enable):
|
||||||
self.javascript(('enter' if enable else 'leave')+'_reference_mode()')
|
self.javascript(('enter' if enable else 'leave')+'_reference_mode()')
|
||||||
|
|
||||||
@ -413,7 +432,7 @@ class Document(QWebPage): # {{{
|
|||||||
def scroll_fraction(self):
|
def scroll_fraction(self):
|
||||||
def fget(self):
|
def fget(self):
|
||||||
try:
|
try:
|
||||||
return float(self.ypos)/(self.height-self.window_height)
|
return abs(float(self.ypos)/(self.height-self.window_height))
|
||||||
except ZeroDivisionError:
|
except ZeroDivisionError:
|
||||||
return 0.
|
return 0.
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
@ -485,7 +504,6 @@ class DocumentView(QWebView): # {{{
|
|||||||
self.initial_pos = 0.0
|
self.initial_pos = 0.0
|
||||||
self.to_bottom = False
|
self.to_bottom = False
|
||||||
self.document = Document(self.shortcuts, parent=self,
|
self.document = Document(self.shortcuts, parent=self,
|
||||||
resize_callback=self.viewport_resized,
|
|
||||||
debug_javascript=debug_javascript)
|
debug_javascript=debug_javascript)
|
||||||
self.setPage(self.document)
|
self.setPage(self.document)
|
||||||
self.manager = None
|
self.manager = None
|
||||||
@ -581,8 +599,8 @@ class DocumentView(QWebView): # {{{
|
|||||||
|
|
||||||
def config(self, parent=None):
|
def config(self, parent=None):
|
||||||
self.document.do_config(parent)
|
self.document.do_config(parent)
|
||||||
if self.manager is not None:
|
if self.document.in_fullscreen_mode:
|
||||||
self.manager.set_max_width()
|
self.document.switch_to_fullscreen_mode()
|
||||||
self.setFocus(Qt.OtherFocusReason)
|
self.setFocus(Qt.OtherFocusReason)
|
||||||
|
|
||||||
def bookmark(self):
|
def bookmark(self):
|
||||||
@ -602,6 +620,9 @@ class DocumentView(QWebView): # {{{
|
|||||||
menu.insertAction(list(menu.actions())[0], self.search_action)
|
menu.insertAction(list(menu.actions())[0], self.search_action)
|
||||||
menu.addSeparator()
|
menu.addSeparator()
|
||||||
menu.addAction(self.goto_location_action)
|
menu.addAction(self.goto_location_action)
|
||||||
|
if self.document.in_fullscreen_mode and self.manager is not None:
|
||||||
|
menu.addSeparator()
|
||||||
|
menu.addAction(self.manager.toggle_toolbar_action)
|
||||||
menu.exec_(ev.globalPos())
|
menu.exec_(ev.globalPos())
|
||||||
|
|
||||||
def lookup(self, *args):
|
def lookup(self, *args):
|
||||||
@ -1001,13 +1022,9 @@ class DocumentView(QWebView): # {{{
|
|||||||
return handled
|
return handled
|
||||||
|
|
||||||
def resizeEvent(self, event):
|
def resizeEvent(self, event):
|
||||||
ret = QWebView.resizeEvent(self, event)
|
|
||||||
QTimer.singleShot(10, self.initialize_scrollbar)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def viewport_resized(self):
|
|
||||||
if self.manager is not None:
|
if self.manager is not None:
|
||||||
self.manager.viewport_resized(self.scroll_fraction)
|
self.manager.viewport_resize_started(event)
|
||||||
|
return QWebView.resizeEvent(self, event)
|
||||||
|
|
||||||
def event(self, ev):
|
def event(self, ev):
|
||||||
if ev.type() == ev.Gesture:
|
if ev.type() == ev.Gesture:
|
||||||
|
@ -5,11 +5,11 @@ import traceback, os, sys, functools, collections, re
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
from PyQt4.Qt import QApplication, Qt, QIcon, QTimer, SIGNAL, QByteArray, \
|
from PyQt4.Qt import (QApplication, Qt, QIcon, QTimer, SIGNAL, QByteArray,
|
||||||
QDoubleSpinBox, QLabel, QTextBrowser, \
|
QSize, QDoubleSpinBox, QLabel, QTextBrowser, QPropertyAnimation,
|
||||||
QPainter, QBrush, QColor, QStandardItemModel, QPalette, \
|
QPainter, QBrush, QColor, QStandardItemModel, QPalette, QStandardItem,
|
||||||
QStandardItem, QUrl, QRegExpValidator, QRegExp, QLineEdit, \
|
QUrl, QRegExpValidator, QRegExp, QLineEdit, QToolButton, QMenu,
|
||||||
QToolButton, QMenu, QInputDialog, QAction, QKeySequence
|
QInputDialog, QAction, QKeySequence)
|
||||||
|
|
||||||
from calibre.gui2.viewer.main_ui import Ui_EbookViewer
|
from calibre.gui2.viewer.main_ui import Ui_EbookViewer
|
||||||
from calibre.gui2.viewer.printing import Printing
|
from calibre.gui2.viewer.printing import Printing
|
||||||
@ -55,8 +55,6 @@ class TOC(QStandardItemModel):
|
|||||||
self.appendRow(TOCItem(t))
|
self.appendRow(TOCItem(t))
|
||||||
self.setHorizontalHeaderItem(0, QStandardItem(_('Table of Contents')))
|
self.setHorizontalHeaderItem(0, QStandardItem(_('Table of Contents')))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Worker(Thread):
|
class Worker(Thread):
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
@ -226,6 +224,10 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
self.toc.setVisible(False)
|
self.toc.setVisible(False)
|
||||||
self.action_quit = QAction(self)
|
self.action_quit = QAction(self)
|
||||||
self.addAction(self.action_quit)
|
self.addAction(self.action_quit)
|
||||||
|
self.view_resized_timer = QTimer(self)
|
||||||
|
self.view_resized_timer.timeout.connect(self.viewport_resize_finished)
|
||||||
|
self.view_resized_timer.setSingleShot(True)
|
||||||
|
self.resize_in_progress = False
|
||||||
qs = [Qt.CTRL+Qt.Key_Q]
|
qs = [Qt.CTRL+Qt.Key_Q]
|
||||||
if isosx:
|
if isosx:
|
||||||
qs += [Qt.CTRL+Qt.Key_W]
|
qs += [Qt.CTRL+Qt.Key_W]
|
||||||
@ -266,6 +268,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
self.connect(self.action_full_screen, SIGNAL('triggered(bool)'),
|
self.connect(self.action_full_screen, SIGNAL('triggered(bool)'),
|
||||||
self.toggle_fullscreen)
|
self.toggle_fullscreen)
|
||||||
self.action_full_screen.setShortcuts([Qt.Key_F11, Qt.CTRL+Qt.SHIFT+Qt.Key_F])
|
self.action_full_screen.setShortcuts([Qt.Key_F11, Qt.CTRL+Qt.SHIFT+Qt.Key_F])
|
||||||
|
self.action_full_screen.setToolTip(_('Toggle full screen (%s)') %
|
||||||
|
_(' or ').join([unicode(x.toString(x.NativeText)) for x in
|
||||||
|
self.action_full_screen.shortcuts()]))
|
||||||
self.connect(self.action_back, SIGNAL('triggered(bool)'), self.back)
|
self.connect(self.action_back, SIGNAL('triggered(bool)'), self.back)
|
||||||
self.connect(self.action_bookmark, SIGNAL('triggered(bool)'), self.bookmark)
|
self.connect(self.action_bookmark, SIGNAL('triggered(bool)'), self.bookmark)
|
||||||
self.connect(self.action_forward, SIGNAL('triggered(bool)'), self.forward)
|
self.connect(self.action_forward, SIGNAL('triggered(bool)'), self.forward)
|
||||||
@ -292,6 +297,38 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
self.tool_bar2.setContextMenuPolicy(Qt.PreventContextMenu)
|
self.tool_bar2.setContextMenuPolicy(Qt.PreventContextMenu)
|
||||||
self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.MenuButtonPopup)
|
self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.MenuButtonPopup)
|
||||||
self.action_full_screen.setCheckable(True)
|
self.action_full_screen.setCheckable(True)
|
||||||
|
self.full_screen_label = QLabel('''
|
||||||
|
<center>
|
||||||
|
<h1>%s</h1>
|
||||||
|
<h3>%s</h3>
|
||||||
|
<h3>%s</h3>
|
||||||
|
</center>
|
||||||
|
'''%(_('Full screen mode'),
|
||||||
|
_('Right click to show controls'),
|
||||||
|
_('Press Esc to quit')),
|
||||||
|
self)
|
||||||
|
self.full_screen_label.setVisible(False)
|
||||||
|
self.full_screen_label.setStyleSheet('''
|
||||||
|
QLabel {
|
||||||
|
text-align: center;
|
||||||
|
background-color: white;
|
||||||
|
color: black;
|
||||||
|
border-width: 1px;
|
||||||
|
border-style: solid;
|
||||||
|
border-radius: 20px;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
self.window_mode_changed = None
|
||||||
|
self.toggle_toolbar_action = QAction(_('Show/hide controls'), self)
|
||||||
|
self.toggle_toolbar_action.triggered.connect(self.toggle_toolbars)
|
||||||
|
self.addAction(self.toggle_toolbar_action)
|
||||||
|
self.full_screen_label_anim = QPropertyAnimation(
|
||||||
|
self.full_screen_label, 'size')
|
||||||
|
self.esc_full_screen_action = a = QAction(self)
|
||||||
|
self.addAction(a)
|
||||||
|
a.setShortcut(Qt.Key_Escape)
|
||||||
|
a.setEnabled(False)
|
||||||
|
a.triggered.connect(self.action_full_screen.trigger)
|
||||||
|
|
||||||
self.print_menu = QMenu()
|
self.print_menu = QMenu()
|
||||||
self.print_menu.addAction(QIcon(I('print-preview.png')), _('Print Preview'))
|
self.print_menu.addAction(QIcon(I('print-preview.png')), _('Print Preview'))
|
||||||
@ -299,7 +336,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
self.tool_bar.widgetForAction(self.action_print).setPopupMode(QToolButton.MenuButtonPopup)
|
self.tool_bar.widgetForAction(self.action_print).setPopupMode(QToolButton.MenuButtonPopup)
|
||||||
self.connect(self.action_print, SIGNAL("triggered(bool)"), partial(self.print_book, preview=False))
|
self.connect(self.action_print, SIGNAL("triggered(bool)"), partial(self.print_book, preview=False))
|
||||||
self.connect(self.print_menu.actions()[0], SIGNAL("triggered(bool)"), partial(self.print_book, preview=True))
|
self.connect(self.print_menu.actions()[0], SIGNAL("triggered(bool)"), partial(self.print_book, preview=True))
|
||||||
self.set_max_width()
|
|
||||||
ca = self.view.copy_action
|
ca = self.view.copy_action
|
||||||
ca.setShortcut(QKeySequence.Copy)
|
ca.setShortcut(QKeySequence.Copy)
|
||||||
self.addAction(ca)
|
self.addAction(ca)
|
||||||
@ -313,6 +349,13 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
w = self.tool_bar.widgetForAction(self.action_open_ebook)
|
w = self.tool_bar.widgetForAction(self.action_open_ebook)
|
||||||
w.setPopupMode(QToolButton.MenuButtonPopup)
|
w.setPopupMode(QToolButton.MenuButtonPopup)
|
||||||
|
|
||||||
|
for x in ('tool_bar', 'tool_bar2'):
|
||||||
|
x = getattr(self, x)
|
||||||
|
for action in x.actions():
|
||||||
|
# So that the keyboard shortcuts for these actions will
|
||||||
|
# continue to function even when the toolbars are hidden
|
||||||
|
self.addAction(action)
|
||||||
|
|
||||||
self.restore_state()
|
self.restore_state()
|
||||||
|
|
||||||
def set_toc_visible(self, yes):
|
def set_toc_visible(self, yes):
|
||||||
@ -338,9 +381,18 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
def closeEvent(self, e):
|
def closeEvent(self, e):
|
||||||
|
if self.isFullScreen():
|
||||||
|
self.action_full_screen.trigger()
|
||||||
|
e.ignore()
|
||||||
|
return
|
||||||
self.save_state()
|
self.save_state()
|
||||||
return MainWindow.closeEvent(self, e)
|
return MainWindow.closeEvent(self, e)
|
||||||
|
|
||||||
|
def toggle_toolbars(self):
|
||||||
|
for x in ('tool_bar', 'tool_bar2'):
|
||||||
|
x = getattr(self, x)
|
||||||
|
x.setVisible(not x.isVisible())
|
||||||
|
|
||||||
def save_state(self):
|
def save_state(self):
|
||||||
state = bytearray(self.saveState(self.STATE_VERSION))
|
state = bytearray(self.saveState(self.STATE_VERSION))
|
||||||
vprefs['viewer_toolbar_state'] = state
|
vprefs['viewer_toolbar_state'] = state
|
||||||
@ -382,11 +434,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
self._lookup = None
|
self._lookup = None
|
||||||
self.dictionary_view.setHtml(html)
|
self.dictionary_view.setHtml(html)
|
||||||
|
|
||||||
def set_max_width(self):
|
|
||||||
from calibre.gui2.viewer.documentview import config
|
|
||||||
c = config().parse()
|
|
||||||
self.frame.setMaximumWidth(c.max_view_width)
|
|
||||||
|
|
||||||
def get_remember_current_page_opt(self):
|
def get_remember_current_page_opt(self):
|
||||||
from calibre.gui2.viewer.documentview import config
|
from calibre.gui2.viewer.documentview import config
|
||||||
c = config().parse()
|
c = config().parse()
|
||||||
@ -401,6 +448,58 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
else:
|
else:
|
||||||
self.showFullScreen()
|
self.showFullScreen()
|
||||||
|
|
||||||
|
def showFullScreen(self):
|
||||||
|
self.view.document.page_position.save()
|
||||||
|
self.window_mode_changed = 'fullscreen'
|
||||||
|
self.tool_bar.setVisible(False)
|
||||||
|
self.tool_bar2.setVisible(False)
|
||||||
|
self._original_frame_margins = (
|
||||||
|
self.centralwidget.layout().contentsMargins(),
|
||||||
|
self.frame.layout().contentsMargins())
|
||||||
|
self.frame.layout().setContentsMargins(0, 0, 0, 0)
|
||||||
|
self.centralwidget.layout().setContentsMargins(0, 0, 0, 0)
|
||||||
|
|
||||||
|
super(EbookViewer, self).showFullScreen()
|
||||||
|
|
||||||
|
def show_full_screen_label(self):
|
||||||
|
f = self.full_screen_label
|
||||||
|
self.esc_full_screen_action.setEnabled(True)
|
||||||
|
f.setVisible(True)
|
||||||
|
height = 200
|
||||||
|
width = int(0.7*self.view.width())
|
||||||
|
f.resize(width, height)
|
||||||
|
f.move((self.view.width() - width)//2, (self.view.height()-height)//2)
|
||||||
|
a = self.full_screen_label_anim
|
||||||
|
a.setDuration(500)
|
||||||
|
a.setStartValue(QSize(width, 0))
|
||||||
|
a.setEndValue(QSize(width, height))
|
||||||
|
a.start()
|
||||||
|
QTimer.singleShot(2750, self.full_screen_label.hide)
|
||||||
|
self.view.document.switch_to_fullscreen_mode()
|
||||||
|
|
||||||
|
def showNormal(self):
|
||||||
|
self.view.document.page_position.save()
|
||||||
|
self.window_mode_changed = 'normal'
|
||||||
|
self.esc_full_screen_action.setEnabled(False)
|
||||||
|
self.tool_bar.setVisible(True)
|
||||||
|
self.tool_bar2.setVisible(True)
|
||||||
|
self.full_screen_label.setVisible(False)
|
||||||
|
if hasattr(self, '_original_frame_margins'):
|
||||||
|
om = self._original_frame_margins
|
||||||
|
self.centralwidget.layout().setContentsMargins(om[0])
|
||||||
|
self.frame.layout().setContentsMargins(om[1])
|
||||||
|
super(EbookViewer, self).showNormal()
|
||||||
|
|
||||||
|
def handle_window_mode_toggle(self):
|
||||||
|
if self.window_mode_changed:
|
||||||
|
fs = self.window_mode_changed == 'fullscreen'
|
||||||
|
self.window_mode_changed = None
|
||||||
|
if fs:
|
||||||
|
self.show_full_screen_label()
|
||||||
|
else:
|
||||||
|
self.view.document.switch_to_window_mode()
|
||||||
|
self.view.document.page_position.restore()
|
||||||
|
|
||||||
def goto(self, ref):
|
def goto(self, ref):
|
||||||
if ref:
|
if ref:
|
||||||
tokens = ref.split('.')
|
tokens = ref.split('.')
|
||||||
@ -428,6 +527,10 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
def toc_clicked(self, index):
|
def toc_clicked(self, index):
|
||||||
item = self.toc_model.itemFromIndex(index)
|
item = self.toc_model.itemFromIndex(index)
|
||||||
if item.abspath is not None:
|
if item.abspath is not None:
|
||||||
|
if not os.path.exists(item.abspath):
|
||||||
|
return error_dialog(self, _('No such location'),
|
||||||
|
_('The location pointed to by this item'
|
||||||
|
' does not exist.'), show=True)
|
||||||
url = QUrl.fromLocalFile(item.abspath)
|
url = QUrl.fromLocalFile(item.abspath)
|
||||||
if item.fragment:
|
if item.fragment:
|
||||||
url.setFragment(item.fragment)
|
url.setFragment(item.fragment)
|
||||||
@ -595,16 +698,28 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
|||||||
self.open_progress_indicator(_('Laying out %s')%self.current_title)
|
self.open_progress_indicator(_('Laying out %s')%self.current_title)
|
||||||
self.view.load_path(path, pos=pos)
|
self.view.load_path(path, pos=pos)
|
||||||
|
|
||||||
def viewport_resized(self, frac):
|
def viewport_resize_started(self, event):
|
||||||
new_page = self.pos.value()
|
if not self.resize_in_progress:
|
||||||
if self.current_page is not None:
|
# First resize, so save the current page position
|
||||||
try:
|
self.resize_in_progress = True
|
||||||
frac = float(new_page-self.current_page.start_page)/(self.current_page.pages-1)
|
if not self.window_mode_changed:
|
||||||
except ZeroDivisionError:
|
# The special handling for window mode changed will already
|
||||||
frac = 0
|
# have saved page position, so only save it if this is not a
|
||||||
self.view.scroll_to(frac, notify=False)
|
# mode change
|
||||||
|
self.view.document.page_position.save()
|
||||||
|
|
||||||
|
if self.resize_in_progress:
|
||||||
|
self.view_resized_timer.start(75)
|
||||||
|
|
||||||
|
def viewport_resize_finished(self):
|
||||||
|
# There hasn't been a resize event for some time
|
||||||
|
# restore the current page position.
|
||||||
|
self.resize_in_progress = False
|
||||||
|
if self.window_mode_changed:
|
||||||
|
# This resize is part of a window mode change, special case it
|
||||||
|
self.handle_window_mode_toggle()
|
||||||
else:
|
else:
|
||||||
self.set_page_number(frac)
|
self.view.document.page_position.restore()
|
||||||
|
|
||||||
def close_progress_indicator(self):
|
def close_progress_indicator(self):
|
||||||
self.pi.stop()
|
self.pi.stop()
|
||||||
|
@ -57,12 +57,20 @@ class PagePosition(object):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self._cpos = self.current_pos
|
self.save()
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
|
self.restore()
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
self._cpos = self.current_pos
|
||||||
|
|
||||||
|
def restore(self):
|
||||||
|
if self._cpos is None: return
|
||||||
if isinstance(self._cpos, (int, float)):
|
if isinstance(self._cpos, (int, float)):
|
||||||
self.document.scroll_fraction = self._cpos
|
self.document.scroll_fraction = self._cpos
|
||||||
else:
|
else:
|
||||||
self.scroll_to_cfi(self._cpos)
|
self.scroll_to_cfi(self._cpos)
|
||||||
self._cpos = None
|
self._cpos = None
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ from PyQt4.Qt import (QIcon, QFont, QLabel, QListWidget, QAction,
|
|||||||
QMenu, QStringListModel, QCompleter, QStringList,
|
QMenu, QStringListModel, QCompleter, QStringList,
|
||||||
QTimer, QRect, QFontDatabase, QGraphicsView)
|
QTimer, QRect, QFontDatabase, QGraphicsView)
|
||||||
|
|
||||||
|
from calibre.constants import iswindows
|
||||||
from calibre.gui2 import (NONE, error_dialog, pixmap_to_data, gprefs,
|
from calibre.gui2 import (NONE, error_dialog, pixmap_to_data, gprefs,
|
||||||
warning_dialog)
|
warning_dialog)
|
||||||
from calibre.gui2.filename_pattern_ui import Ui_Form
|
from calibre.gui2.filename_pattern_ui import Ui_Form
|
||||||
@ -365,7 +366,7 @@ class FontFamilyModel(QAbstractListModel): # {{{
|
|||||||
self.families = list(qt_families.intersection(set(self.families)))
|
self.families = list(qt_families.intersection(set(self.families)))
|
||||||
self.families.sort()
|
self.families.sort()
|
||||||
self.families[:0] = [_('None')]
|
self.families[:0] = [_('None')]
|
||||||
self.font = QFont('sansserif')
|
self.font = QFont('Verdana' if iswindows else 'sansserif')
|
||||||
|
|
||||||
def rowCount(self, *args):
|
def rowCount(self, *args):
|
||||||
return len(self.families)
|
return len(self.families)
|
||||||
|
@ -172,11 +172,14 @@ def force_to_bool(val):
|
|||||||
|
|
||||||
class CacheRow(list): # {{{
|
class CacheRow(list): # {{{
|
||||||
|
|
||||||
def __init__(self, db, composites, val):
|
def __init__(self, db, composites, val, series_col, series_sort_col):
|
||||||
self.db = db
|
self.db = db
|
||||||
self._composites = composites
|
self._composites = composites
|
||||||
list.__init__(self, val)
|
list.__init__(self, val)
|
||||||
self._must_do = len(composites) > 0
|
self._must_do = len(composites) > 0
|
||||||
|
self._series_col = series_col
|
||||||
|
self._series_sort_col = series_sort_col
|
||||||
|
self._series_sort = None
|
||||||
|
|
||||||
def __getitem__(self, col):
|
def __getitem__(self, col):
|
||||||
if self._must_do:
|
if self._must_do:
|
||||||
@ -191,12 +194,19 @@ class CacheRow(list): # {{{
|
|||||||
elif col in self._composites:
|
elif col in self._composites:
|
||||||
is_comp = True
|
is_comp = True
|
||||||
if is_comp:
|
if is_comp:
|
||||||
id = list.__getitem__(self, 0)
|
id_ = list.__getitem__(self, 0)
|
||||||
self._must_do = False
|
self._must_do = False
|
||||||
mi = self.db.get_metadata(id, index_is_id=True,
|
mi = self.db.get_metadata(id_, index_is_id=True,
|
||||||
get_user_categories=False)
|
get_user_categories=False)
|
||||||
for c in self._composites:
|
for c in self._composites:
|
||||||
self[c] = mi.get(self._composites[c])
|
self[c] = mi.get(self._composites[c])
|
||||||
|
if col == self._series_sort_col and self._series_sort is None:
|
||||||
|
if self[self._series_col]:
|
||||||
|
self._series_sort = title_sort(self[self._series_col])
|
||||||
|
self[self._series_sort_col] = self._series_sort
|
||||||
|
else:
|
||||||
|
self._series_sort = ''
|
||||||
|
self[self._series_sort_col] = ''
|
||||||
return list.__getitem__(self, col)
|
return list.__getitem__(self, col)
|
||||||
|
|
||||||
def __getslice__(self, i, j):
|
def __getslice__(self, i, j):
|
||||||
@ -226,6 +236,8 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
for key in field_metadata:
|
for key in field_metadata:
|
||||||
if field_metadata[key]['datatype'] == 'composite':
|
if field_metadata[key]['datatype'] == 'composite':
|
||||||
self.composites[field_metadata[key]['rec_index']] = key
|
self.composites[field_metadata[key]['rec_index']] = key
|
||||||
|
self.series_col = field_metadata['series']['rec_index']
|
||||||
|
self.series_sort_col = field_metadata['series_sort']['rec_index']
|
||||||
self._data = []
|
self._data = []
|
||||||
self._map = self._map_filtered = []
|
self._map = self._map_filtered = []
|
||||||
self.first_sort = True
|
self.first_sort = True
|
||||||
@ -918,9 +930,11 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
for id in ids:
|
for id in ids:
|
||||||
try:
|
try:
|
||||||
self._data[id] = CacheRow(db, self.composites,
|
self._data[id] = CacheRow(db, self.composites,
|
||||||
db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
|
db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0],
|
||||||
|
self.series_col, self.series_sort_col)
|
||||||
self._data[id].append(db.book_on_device_string(id))
|
self._data[id].append(db.book_on_device_string(id))
|
||||||
self._data[id].append(self.marked_ids_dict.get(id, None))
|
self._data[id].append(self.marked_ids_dict.get(id, None))
|
||||||
|
self._data[id].append(None)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
@ -935,9 +949,11 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
self._data.extend(repeat(None, max(ids)-len(self._data)+2))
|
self._data.extend(repeat(None, max(ids)-len(self._data)+2))
|
||||||
for id in ids:
|
for id in ids:
|
||||||
self._data[id] = CacheRow(db, self.composites,
|
self._data[id] = CacheRow(db, self.composites,
|
||||||
db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0])
|
db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0],
|
||||||
|
self.series_col, self.series_sort_col)
|
||||||
self._data[id].append(db.book_on_device_string(id))
|
self._data[id].append(db.book_on_device_string(id))
|
||||||
self._data[id].append(self.marked_ids_dict.get(id, None))
|
self._data[id].append(self.marked_ids_dict.get(id, None))
|
||||||
|
self._data[id].append(None) # Series sort column
|
||||||
self._map[0:0] = ids
|
self._map[0:0] = ids
|
||||||
self._map_filtered[0:0] = ids
|
self._map_filtered[0:0] = ids
|
||||||
|
|
||||||
@ -962,11 +978,13 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
temp = db.conn.get('SELECT * FROM meta2')
|
temp = db.conn.get('SELECT * FROM meta2')
|
||||||
self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
|
self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
|
||||||
for r in temp:
|
for r in temp:
|
||||||
self._data[r[0]] = CacheRow(db, self.composites, r)
|
self._data[r[0]] = CacheRow(db, self.composites, r,
|
||||||
|
self.series_col, self.series_sort_col)
|
||||||
for item in self._data:
|
for item in self._data:
|
||||||
if item is not None:
|
if item is not None:
|
||||||
item.append(db.book_on_device_string(item[0]))
|
item.append(db.book_on_device_string(item[0]))
|
||||||
item.append(None)
|
# Temp mark and series_sort columns
|
||||||
|
item.extend((None, None))
|
||||||
|
|
||||||
marked_col = self.FIELD_MAP['marked']
|
marked_col = self.FIELD_MAP['marked']
|
||||||
for id_,val in self.marked_ids_dict.iteritems():
|
for id_,val in self.marked_ids_dict.iteritems():
|
||||||
|
@ -434,6 +434,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False)
|
self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False)
|
||||||
self.FIELD_MAP['marked'] = base = base+1
|
self.FIELD_MAP['marked'] = base = base+1
|
||||||
self.field_metadata.set_field_record_index('marked', base, prefer_custom=False)
|
self.field_metadata.set_field_record_index('marked', base, prefer_custom=False)
|
||||||
|
self.FIELD_MAP['series_sort'] = base = base+1
|
||||||
|
self.field_metadata.set_field_record_index('series_sort', base, prefer_custom=False)
|
||||||
|
|
||||||
script = '''
|
script = '''
|
||||||
DROP VIEW IF EXISTS meta2;
|
DROP VIEW IF EXISTS meta2;
|
||||||
|
@ -327,6 +327,16 @@ class FieldMetadata(dict):
|
|||||||
'is_custom':False,
|
'is_custom':False,
|
||||||
'is_category':False,
|
'is_category':False,
|
||||||
'is_csp': False}),
|
'is_csp': False}),
|
||||||
|
('series_sort', {'table':None,
|
||||||
|
'column':None,
|
||||||
|
'datatype':'text',
|
||||||
|
'is_multiple':{},
|
||||||
|
'kind':'field',
|
||||||
|
'name':_('Series Sort'),
|
||||||
|
'search_terms':['series_sort'],
|
||||||
|
'is_custom':False,
|
||||||
|
'is_category':False,
|
||||||
|
'is_csp': False}),
|
||||||
('sort', {'table':None,
|
('sort', {'table':None,
|
||||||
'column':None,
|
'column':None,
|
||||||
'datatype':'text',
|
'datatype':'text',
|
||||||
|
@ -298,6 +298,7 @@ The following functions are available in addition to those described in single-f
|
|||||||
* ``or(value, value, ...)`` -- returns the string "1" if any value is not empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
|
* ``or(value, value, ...)`` -- returns the string "1" if any value is not empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
|
||||||
* ``print(a, b, ...)`` -- prints the arguments to standard output. Unless you start calibre from the command line (``calibre-debug -g``), the output will go to a black hole.
|
* ``print(a, b, ...)`` -- prints the arguments to standard output. Unless you start calibre from the command line (``calibre-debug -g``), the output will go to a black hole.
|
||||||
* ``raw_field(name)`` -- returns the metadata field named by name without applying any formatting.
|
* ``raw_field(name)`` -- returns the metadata field named by name without applying any formatting.
|
||||||
|
* ``series_sort()`` -- returns the series sort value.
|
||||||
* ``strcat(a, b, ...)`` -- can take any number of arguments. Returns a string formed by concatenating all the arguments.
|
* ``strcat(a, b, ...)`` -- can take any number of arguments. Returns a string formed by concatenating all the arguments.
|
||||||
* ``strcat_max(max, string1, prefix2, string2, ...)`` -- Returns a string formed by concatenating the arguments. The returned value is initialized to string1. `Prefix, string` pairs are added to the end of the value as long as the resulting string length is less than `max`. String1 is returned even if string1 is longer than max. You can pass as many `prefix, string` pairs as you wish.
|
* ``strcat_max(max, string1, prefix2, string2, ...)`` -- Returns a string formed by concatenating the arguments. The returned value is initialized to string1. `Prefix, string` pairs are added to the end of the value as long as the resulting string length is less than `max`. String1 is returned even if string1 is longer than max. You can pass as many `prefix, string` pairs as you wish.
|
||||||
* ``strcmp(x, y, lt, eq, gt)`` -- does a case-insensitive comparison x and y as strings. Returns ``lt`` if x < y. Returns ``eq`` if x == y. Otherwise returns ``gt``.
|
* ``strcmp(x, y, lt, eq, gt)`` -- does a case-insensitive comparison x and y as strings. Returns ``lt`` if x < y. Returns ``eq`` if x == y. Otherwise returns ``gt``.
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user