mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
sync to trunk.
This commit is contained in:
commit
fd288645d0
62
recipes/defensenews.recipe
Normal file
62
recipes/defensenews.recipe
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.defensenews.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class DefenseNews(BasicNewsRecipe):
|
||||||
|
title = 'Defense News'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Find late-breaking defense news from the leading defense news weekly'
|
||||||
|
publisher = 'Gannett Government Media Corporation'
|
||||||
|
category = 'defense news, defence news, defense, defence, defence budget, defence policy'
|
||||||
|
oldest_article = 31
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
masthead_url = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
.info{font-size: small; color: gray}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['meta','link'])
|
||||||
|
,dict(attrs={'class':['toolbar','related','left','right']})
|
||||||
|
]
|
||||||
|
remove_tags_before = attrs={'class':'storyWrp'}
|
||||||
|
remove_tags_after = attrs={'class':'middle'}
|
||||||
|
|
||||||
|
remove_attributes=['lang']
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Europe' , u'http://www.defensenews.com/rss/eur/' )
|
||||||
|
,(u'Americas', u'http://www.defensenews.com/rss/ame/' )
|
||||||
|
,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asi/' )
|
||||||
|
,(u'Middle east & Africa', u'http://www.defensenews.com/rss/mid/')
|
||||||
|
,(u'Air', u'http://www.defensenews.com/rss/air/' )
|
||||||
|
,(u'Land', u'http://www.defensenews.com/rss/lan/' )
|
||||||
|
,(u'Naval', u'http://www.defensenews.com/rss/sea/' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll('img'):
|
||||||
|
if not item.has_key('alt'):
|
||||||
|
item['alt'] = 'image'
|
||||||
|
return soup
|
@ -2,6 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
http://www.dilbert.com
|
http://www.dilbert.com
|
||||||
|
DrMerry added cover Image 2011-11-12
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
@ -9,7 +10,7 @@ import re
|
|||||||
|
|
||||||
class DilbertBig(BasicNewsRecipe):
|
class DilbertBig(BasicNewsRecipe):
|
||||||
title = 'Dilbert'
|
title = 'Dilbert'
|
||||||
__author__ = 'Darko Miletic and Starson17'
|
__author__ = 'Darko Miletic and Starson17 contribution of DrMerry'
|
||||||
description = 'Dilbert'
|
description = 'Dilbert'
|
||||||
reverse_article_order = True
|
reverse_article_order = True
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
@ -20,6 +21,7 @@ class DilbertBig(BasicNewsRecipe):
|
|||||||
publisher = 'UNITED FEATURE SYNDICATE, INC.'
|
publisher = 'UNITED FEATURE SYNDICATE, INC.'
|
||||||
category = 'comic'
|
category = 'comic'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
cover_url = 'http://dilbert.com/mobile/mobile/dilbert.app.icon.png'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
|
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
|
|||||||
' perspective. Best downloaded on Friday mornings (GMT)')
|
' perspective. Best downloaded on Friday mornings (GMT)')
|
||||||
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
||||||
oldest_article = 7.0
|
oldest_article = 7.0
|
||||||
cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
|
|
||||||
#cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||||
@ -56,6 +54,14 @@ class Economist(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
br = self.browser
|
||||||
|
br.open(self.INDEX)
|
||||||
|
issue = br.geturl().split('/')[4]
|
||||||
|
self.log('Fetching cover for issue: %s'%issue)
|
||||||
|
cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
|
||||||
|
return cover_url
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
return self.economist_parse_index()
|
return self.economist_parse_index()
|
||||||
|
|
||||||
|
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
|
|||||||
' perspective. Best downloaded on Friday mornings (GMT)')
|
' perspective. Best downloaded on Friday mornings (GMT)')
|
||||||
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
||||||
oldest_article = 7.0
|
oldest_article = 7.0
|
||||||
cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
|
|
||||||
#cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||||
@ -40,6 +38,14 @@ class Economist(BasicNewsRecipe):
|
|||||||
# downloaded with connection reset by peer (104) errors.
|
# downloaded with connection reset by peer (104) errors.
|
||||||
delay = 1
|
delay = 1
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
br = self.browser
|
||||||
|
br.open(self.INDEX)
|
||||||
|
issue = br.geturl().split('/')[4]
|
||||||
|
self.log('Fetching cover for issue: %s'%issue)
|
||||||
|
cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
try:
|
try:
|
||||||
|
@ -19,45 +19,20 @@ class FazNet(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
keep_only_tags = [{'class':'FAZArtikelEinleitung'},
|
||||||
html2lrf_options = [
|
{'id':'ArtikelTabContent_0'}]
|
||||||
'--comment', description
|
|
||||||
, '--category', category
|
|
||||||
, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['object','link','embed','base'])
|
|
||||||
,dict(name='div',
|
|
||||||
attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo',
|
|
||||||
'ArtikelServices', 'ModulLesermeinungenFooter',
|
|
||||||
'ModulArtikelServices', 'BoxTool Aufklappen_Grau',
|
|
||||||
'SocialMediaUnten', ]}),
|
|
||||||
dict(id=['KurzLinkMenu', 'ArtikelServicesMenu']),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('FAZ.NET Aktuell', 'http://www.faz.net/s/RubF3CE08B362D244869BE7984590CB6AC1/Tpl~Epartner~SRss_.xml'),
|
('FAZ.NET Aktuell', 'http://www.faz.net/aktuell/?rssview=1'),
|
||||||
('Politik', 'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'),
|
('Politik', 'http://www.faz.net/aktuell/politik/?rssview=1'),
|
||||||
('Wirtschaft', 'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'),
|
('Wirtschaft', 'http://www.faz.net/aktuell/wirtschaft/?rssview=1'),
|
||||||
('Feuilleton', 'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'),
|
('Feuilleton', 'http://www.faz.net/aktuell/feuilleton/?rssview=1'),
|
||||||
('Sport', 'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'),
|
('Sport', 'http://www.faz.net/aktuell/sport/?rssview=1'),
|
||||||
('Gesellschaft', 'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'),
|
('Gesellschaft', 'http://www.faz.net/aktuell/gesellschaft/?rssview=1'),
|
||||||
('Finanzen', 'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'),
|
('Finanzen', 'http://www.faz.net/aktuell/finanzen/?rssview=1'),
|
||||||
('Wissen', 'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'),
|
('Technik & Motor', 'http://www.faz.net/aktuell/technik-motor/?rssview=1'),
|
||||||
('Reise', 'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'),
|
('Wissen', 'http://www.faz.net/aktuell/wissen/?rssview=1'),
|
||||||
('Technik & Motor', 'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'),
|
('Reise', 'http://www.faz.net/aktuell/reise/?rssview=1'),
|
||||||
('Beruf & Chance', 'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml')
|
('Beruf & Chance', 'http://www.faz.net/aktuell/beruf-chance/?rssview=1')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
|
||||||
soup.head.insert(0,mtag)
|
|
||||||
del soup.body['onload']
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
||||||
|
@ -1,35 +1,71 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
|
from calibre.utils.magick import Image
|
||||||
|
|
||||||
class AdvancedUserRecipe1307556816(BasicNewsRecipe):
|
class AdvancedUserRecipe1307556816(BasicNewsRecipe):
|
||||||
title = u'Geek and Poke'
|
title = u'Geek and Poke'
|
||||||
__author__ = u'DrMerry'
|
__author__ = u'DrMerry'
|
||||||
description = u'Geek and Poke Cartoons'
|
description = u'Geek and Poke Cartoons'
|
||||||
|
publisher = u'Oliver Widder'
|
||||||
|
author = u'Oliver Widder, DrMerry (calibre-code), calibre'
|
||||||
oldest_article = 31
|
oldest_article = 31
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = u'en'
|
language = u'en'
|
||||||
simultaneous_downloads = 5
|
simultaneous_downloads = 5
|
||||||
#delay = 1
|
#delay = 1
|
||||||
timefmt = ' [%A, %d %B, %Y]'
|
timefmt = ' [%a, %d %B, %Y]'
|
||||||
summary_length = -1
|
summary_length = -1
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
category = 'News.IT, Cartoon, Humor, Geek'
|
||||||
|
use_embedded_content = False
|
||||||
cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
|
cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'blog'
|
publication_type = 'blog'
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : ''
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
,'author' : author
|
||||||
|
}
|
||||||
|
|
||||||
preprocess_regexps = [ (re.compile(r'(<p> </p>|<iframe.*</iframe>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
|
remove_tags_before = dict(name='p', attrs={'class':'content-nav'})
|
||||||
(re.compile(r'( | )', re.DOTALL|re.IGNORECASE),lambda match: ' '),
|
remove_tags_after = dict(name='div', attrs={'class':'entry-content'})
|
||||||
(re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>')
|
remove_tags = [dict(name='div', attrs={'class':'entry-footer'}),
|
||||||
|
dict(name='div', attrs={'id':'alpha'}),
|
||||||
|
dict(name='div', attrs={'id':'gamma'}),
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='p', attrs={'class':'content-nav'})]
|
||||||
|
|
||||||
|
filter_regexps = [(r'feedburner\.com'),
|
||||||
|
(r'pixel.quantserve\.com'),
|
||||||
|
(r'googlesyndication\.com'),
|
||||||
|
(r'yimg\.com'),
|
||||||
|
(r'scorecardresearch\.com')]
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'(<p>( |\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
|
||||||
|
(re.compile(r'( |\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
|
||||||
|
(re.compile(r'<h2[^>]*>([^<]*)</h2>[^>]*(<div[^>]*>)', re.DOTALL|re.IGNORECASE), lambda match: match.group(2) + '<div id="MERRYdate">' + match.group(1) + '</div>'),
|
||||||
|
(re.compile(r'(<h3[^>]*>)<a[^>]>((?!</a)*)</a></h3>', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + match.group(2) + '</h3>'),
|
||||||
|
(re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + '<br><cite>' + match.group(2) + '</cite>'),
|
||||||
|
(re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>'),
|
||||||
|
(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = 'body, h3, p, h2, h1, div, span{margin:0px} h2.date-header {font-size: 0.7em; color:#eee;} h3.entry-header{font-size: 1.0em} div.entry-body{font-size: 0.9em}'
|
extra_css = 'body, h3, p, #MERRYdate, h1, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em} #MERRYdate {font-size: 0.5em}'
|
||||||
|
|
||||||
|
def postprocess_html(self, soup, first):
|
||||||
|
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||||
|
iurl = tag['src']
|
||||||
|
img = Image()
|
||||||
|
img.open(iurl)
|
||||||
|
width, height = img.size
|
||||||
|
#print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
|
||||||
|
img.trim(0)
|
||||||
|
img.save(iurl)
|
||||||
|
width, height = img.size
|
||||||
|
#print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
|
||||||
|
return soup
|
||||||
|
|
||||||
remove_tags_before = dict(name='h2', attrs={'class':'date-header'})
|
feeds = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'entry-body'})
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'Geek and Poke', u'http://feeds.feedburner.com/GeekAndPoke?format=xml')]
|
|
||||||
|
@ -119,10 +119,8 @@ class Guardian(BasicNewsRecipe):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
try:
|
|
||||||
feeds = []
|
feeds = []
|
||||||
for title, href in self.find_sections():
|
for title, href in self.find_sections():
|
||||||
feeds.append((title, list(self.find_articles(href))))
|
feeds.append((title, list(self.find_articles(href))))
|
||||||
return feeds
|
return feeds
|
||||||
except:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class AdvancedUserRecipe(BasicNewsRecipe):
|
class AdvancedUserRecipe(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Heise-online'
|
title = 'heise online'
|
||||||
description = 'News vom Heise-Verlag'
|
description = 'News vom Heise-Verlag'
|
||||||
__author__ = 'schuster'
|
__author__ = 'schuster'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
@ -12,10 +14,11 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
timeout = 5
|
timeout = 5
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
|
||||||
|
|
||||||
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
|
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
|
||||||
remove_tags = [dict(id='navi_top_container'),
|
remove_tags = [{'class':'navi_top_container'},
|
||||||
dict(id='navi_bottom'),
|
dict(id='navi_bottom'),
|
||||||
dict(id='mitte_rechts'),
|
dict(id='mitte_rechts'),
|
||||||
dict(id='navigation'),
|
dict(id='navigation'),
|
||||||
@ -25,28 +28,28 @@ class AdvancedUserRecipe(BasicNewsRecipe):
|
|||||||
dict(id='content_foren'),
|
dict(id='content_foren'),
|
||||||
dict(id='seiten_navi'),
|
dict(id='seiten_navi'),
|
||||||
dict(id='adbottom'),
|
dict(id='adbottom'),
|
||||||
dict(id='sitemap')]
|
dict(id='sitemap'),
|
||||||
|
dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
|
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
|
||||||
('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
|
|
||||||
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
|
|
||||||
('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
|
|
||||||
('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
|
|
||||||
('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
|
|
||||||
('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
|
|
||||||
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
|
|
||||||
('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
|
|
||||||
('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
|
|
||||||
('iX', 'http://www.heise.de/ix/news/news.rdf'),
|
('iX', 'http://www.heise.de/ix/news/news.rdf'),
|
||||||
('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
|
('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
|
||||||
|
('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
|
||||||
|
('Security', 'http://www.heise.de/security/news/news-atom.xml'),
|
||||||
|
('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
|
||||||
|
('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
|
||||||
|
('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
|
||||||
|
('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
|
||||||
|
('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
|
||||||
|
('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
|
||||||
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
|
('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
|
||||||
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
|
('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
|
||||||
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
|
('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
|
||||||
('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
|
('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
|
||||||
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
|
('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?view=print'
|
return url + '?view=print'
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ class HoustonChronicle(BasicNewsRecipe):
|
|||||||
|
|
||||||
keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
|
keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
|
||||||
'hst-articletext' in x or 'hst-galleryitem' in x)}
|
'hst-articletext' in x or 'hst-galleryitem' in x)}
|
||||||
|
remove_attributes = ['xmlns']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('News', "http://www.chron.com/rss/feed/News-270.php"),
|
('News', "http://www.chron.com/rss/feed/News-270.php"),
|
||||||
|
27
recipes/merco_press.recipe
Normal file
27
recipes/merco_press.recipe
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MercoPress(BasicNewsRecipe):
|
||||||
|
title = u'Merco Press'
|
||||||
|
description = u"Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America."
|
||||||
|
cover_url = 'http://en.mercopress.com/web/img/en/mercopress-logo.gif'
|
||||||
|
|
||||||
|
__author__ = 'Russell Phillips'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
|
||||||
|
remove_tags = [dict(name='a')]
|
||||||
|
|
||||||
|
feeds = [('Antarctica', 'http://en.mercopress.com/rss/antarctica'),
|
||||||
|
('Argentina', 'http://en.mercopress.com/rss/argentina'),
|
||||||
|
('Brazil', 'http://en.mercopress.com/rss/brazil'),
|
||||||
|
('Falkland Islands', 'http://en.mercopress.com/rss/falkland-islands'),
|
||||||
|
('International News', 'http://en.mercopress.com/rss/international'),
|
||||||
|
('Latin America', 'http://en.mercopress.com/rss/latin-america'),
|
||||||
|
('Mercosur', 'http://en.mercopress.com/rss/mercosur'),
|
||||||
|
('Paraguay', 'http://en.mercopress.com/rss/paraguay'),
|
||||||
|
('United States', 'http://en.mercopress.com/rss/united-states'),
|
||||||
|
('Uruguay://en.mercopress.com/rss/uruguay')]
|
@ -5,30 +5,46 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
description = 'News as provide by The Metro -UK'
|
description = 'News as provide by The Metro -UK'
|
||||||
|
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
|
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 20
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
|
#preprocess_regexps = [(re.compile(r'Tweet'), lambda a : '')]
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||||
|
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
|
|
||||||
|
|
||||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||||
|
|
||||||
extra_css = 'h2 {font: sans-serif medium;}'
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
|
dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
|
||||||
dict(attrs={'class':['img-cnt figure']}),
|
dict(attrs={'class':['img-cnt figure']}),
|
||||||
dict(attrs={'class':['art-img']}),
|
dict(attrs={'class':['art-img']}),
|
||||||
|
dict(name='div', attrs={'class':'art-lft'}),
|
||||||
dict(name='div', attrs={'class':'art-lft'})
|
dict(name='p')
|
||||||
]
|
]
|
||||||
remove_tags = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
|
remove_tags = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
|
||||||
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
|
'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
|
||||||
dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
|
dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
|
||||||
|
,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
|
||||||
]
|
]
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
|
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font: sans-serif medium;}'
|
||||||
|
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
|
||||||
|
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
|
||||||
|
span{ font-size:9.5px; font-weight:bold;font-style:italic}
|
||||||
|
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||||
|
|
||||||
|
'''
|
||||||
|
@ -6,19 +6,24 @@ __Region__ = 'Hong Kong'
|
|||||||
# Users of Kindle 3 with limited system-level CJK support
|
# Users of Kindle 3 with limited system-level CJK support
|
||||||
# please replace the following "True" with "False".
|
# please replace the following "True" with "False".
|
||||||
__MakePeriodical__ = True
|
__MakePeriodical__ = True
|
||||||
# Turn below to true if your device supports display of CJK titles
|
# Turn below to True if your device supports display of CJK titles
|
||||||
__UseChineseTitle__ = False
|
__UseChineseTitle__ = False
|
||||||
# Set it to False if you want to skip images
|
# Set it to False if you want to skip images
|
||||||
__KeepImages__ = True
|
__KeepImages__ = True
|
||||||
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
|
||||||
__UseLife__ = True
|
__UseLife__ = True
|
||||||
# (HK only) if __UseLife__ is true, turn this on if you want to include the column section
|
# (HK only) It is to disable the column section which is now a premium content
|
||||||
__InclCols__ = False
|
__InclCols__ = False
|
||||||
|
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
|
||||||
|
__ParsePFF__ = False
|
||||||
|
# (HK only) Turn below to True if you wish hi-res images
|
||||||
|
__HiResImg__ = False
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
2011/09/21: fetching "column" section is made optional. Default is False
|
2011/10/04: option to get hi-res photos for the articles
|
||||||
|
2011/09/21: fetching "column" section is made optional.
|
||||||
2011/09/18: parse "column" section stuff from source text file directly.
|
2011/09/18: parse "column" section stuff from source text file directly.
|
||||||
2011/09/07: disable "column" section as it is no longer offered free.
|
2011/09/07: disable "column" section as it is no longer offered free.
|
||||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||||
@ -42,7 +47,7 @@ Change Log:
|
|||||||
2010/10/31: skip repeated articles in section pages
|
2010/10/31: skip repeated articles in section pages
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, datetime, re
|
import os, datetime, re, mechanize
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from contextlib import nested
|
from contextlib import nested
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
@ -56,7 +61,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
title = 'Ming Pao - Hong Kong'
|
title = 'Ming Pao - Hong Kong'
|
||||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||||
category = 'Chinese, News, Hong Kong'
|
category = 'Chinese, News, Hong Kong'
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
||||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
keep_only_tags = [dict(name='h1'),
|
keep_only_tags = [dict(name='h1'),
|
||||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||||
@ -147,43 +152,6 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
|
|
||||||
def image_url_processor(cls, baseurl, url):
|
|
||||||
# trick: break the url at the first occurance of digit, add an additional
|
|
||||||
# '_' at the front
|
|
||||||
# not working, may need to move this to preprocess_html() method
|
|
||||||
# minIdx = 10000
|
|
||||||
# i0 = url.find('0')
|
|
||||||
# if i0 >= 0 and i0 < minIdx:
|
|
||||||
# minIdx = i0
|
|
||||||
# i1 = url.find('1')
|
|
||||||
# if i1 >= 0 and i1 < minIdx:
|
|
||||||
# minIdx = i1
|
|
||||||
# i2 = url.find('2')
|
|
||||||
# if i2 >= 0 and i2 < minIdx:
|
|
||||||
# minIdx = i2
|
|
||||||
# i3 = url.find('3')
|
|
||||||
# if i3 >= 0 and i0 < minIdx:
|
|
||||||
# minIdx = i3
|
|
||||||
# i4 = url.find('4')
|
|
||||||
# if i4 >= 0 and i4 < minIdx:
|
|
||||||
# minIdx = i4
|
|
||||||
# i5 = url.find('5')
|
|
||||||
# if i5 >= 0 and i5 < minIdx:
|
|
||||||
# minIdx = i5
|
|
||||||
# i6 = url.find('6')
|
|
||||||
# if i6 >= 0 and i6 < minIdx:
|
|
||||||
# minIdx = i6
|
|
||||||
# i7 = url.find('7')
|
|
||||||
# if i7 >= 0 and i7 < minIdx:
|
|
||||||
# minIdx = i7
|
|
||||||
# i8 = url.find('8')
|
|
||||||
# if i8 >= 0 and i8 < minIdx:
|
|
||||||
# minIdx = i8
|
|
||||||
# i9 = url.find('9')
|
|
||||||
# if i9 >= 0 and i9 < minIdx:
|
|
||||||
# minIdx = i9
|
|
||||||
return url
|
|
||||||
|
|
||||||
def get_dtlocal(self):
|
def get_dtlocal(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
if __Region__ == 'Hong Kong':
|
if __Region__ == 'Hong Kong':
|
||||||
@ -260,15 +228,16 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
else:
|
else:
|
||||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||||
|
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
# special- editorial
|
# special- editorial
|
||||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
#ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||||
if ed_articles:
|
#if ed_articles:
|
||||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||||
|
|
||||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||||
@ -279,20 +248,39 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
# special - finance
|
# special - finance
|
||||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
#fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||||
if fin_articles:
|
#if fin_articles:
|
||||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||||
|
|
||||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
|
||||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
articles = self.parse_section2(url, keystr)
|
||||||
articles = self.parse_section(url)
|
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
|
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||||
|
# articles = self.parse_section(url)
|
||||||
|
# if articles:
|
||||||
|
# feeds.append((title, articles))
|
||||||
|
|
||||||
# special - entertainment
|
# special - entertainment
|
||||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||||
if ent_articles:
|
#if ent_articles:
|
||||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
# feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||||
|
|
||||||
|
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||||
|
]:
|
||||||
|
articles = self.parse_section2(url, keystr)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
if __InclCols__ == True:
|
||||||
|
# parse column section articles directly from .txt files
|
||||||
|
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
|
]:
|
||||||
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
@ -300,11 +288,6 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
|
||||||
# special- columns
|
|
||||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
|
||||||
if col_articles:
|
|
||||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
|
||||||
elif __Region__ == 'Vancouver':
|
elif __Region__ == 'Vancouver':
|
||||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||||
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||||
@ -348,6 +331,16 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = a.get('href', False)
|
url = a.get('href', False)
|
||||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
|
# replace the url to the print-friendly version
|
||||||
|
if __ParsePFF__ == True:
|
||||||
|
if url.rfind('Redirect') <> -1:
|
||||||
|
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
||||||
|
url = re.sub('%2F.*%2F', '/', url)
|
||||||
|
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||||
|
url = url.replace('%2Etxt', '_print.htm')
|
||||||
|
url = url.replace('%5F', '_')
|
||||||
|
else:
|
||||||
|
url = url.replace('.htm', '_print.htm')
|
||||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
@ -472,11 +465,92 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
# preprocess those .txt based files
|
# preprocess those .txt and javascript based files
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
if url.rfind('ftp') == -1:
|
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||||
|
if __HiResImg__ == True:
|
||||||
|
# TODO: add a _ in front of an image url
|
||||||
|
if url.rfind('news.mingpao.com') > -1:
|
||||||
|
imglist = re.findall('src="?.*?jpg"', raw_html)
|
||||||
|
br = mechanize.Browser()
|
||||||
|
br.set_handle_redirect(False)
|
||||||
|
for img in imglist:
|
||||||
|
gifimg = img.replace('jpg"', 'gif"')
|
||||||
|
try:
|
||||||
|
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||||
|
raw_html = raw_html.replace(img, gifimg)
|
||||||
|
except:
|
||||||
|
# find the location of the first _
|
||||||
|
pos = img.find('_')
|
||||||
|
if pos > -1:
|
||||||
|
# if found, insert _ after the first _
|
||||||
|
newimg = img[0:pos] + '_' + img[pos:]
|
||||||
|
raw_html = raw_html.replace(img, newimg)
|
||||||
|
else:
|
||||||
|
# if not found, insert _ after "
|
||||||
|
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
|
||||||
|
elif url.rfind('life.mingpao.com') > -1:
|
||||||
|
imglist = re.findall('src=\'?.*?jpg\'', raw_html)
|
||||||
|
br = mechanize.Browser()
|
||||||
|
br.set_handle_redirect(False)
|
||||||
|
#print 'Img list: ', imglist, '\n'
|
||||||
|
for img in imglist:
|
||||||
|
gifimg = img.replace('jpg\'', 'gif\'')
|
||||||
|
try:
|
||||||
|
#print 'Original: ', url
|
||||||
|
#print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
|
||||||
|
gifurl = re.sub(r'dailynews.*txt', '', url)
|
||||||
|
#print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
|
||||||
|
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||||
|
#print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
|
||||||
|
#br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||||
|
raw_html = raw_html.replace(img, gifimg)
|
||||||
|
except:
|
||||||
|
#print 'GIF not found'
|
||||||
|
pos = img.rfind('/')
|
||||||
|
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||||
|
#print 'newimg: ', newimg
|
||||||
|
raw_html = raw_html.replace(img, newimg)
|
||||||
|
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
|
||||||
return raw_html
|
return raw_html
|
||||||
else:
|
else:
|
||||||
|
if url.rfind('_print.htm') <> -1:
|
||||||
|
# javascript based file
|
||||||
|
splitter = re.compile(r'\n')
|
||||||
|
new_raw_html = '<html><head><title>Untitled</title></head>'
|
||||||
|
new_raw_html = new_raw_html + '<body>'
|
||||||
|
for item in splitter.split(raw_html):
|
||||||
|
if item.startswith('var heading1 ='):
|
||||||
|
heading = item.replace('var heading1 = \'', '')
|
||||||
|
heading = heading.replace('\'', '')
|
||||||
|
heading = heading.replace(';', '')
|
||||||
|
new_raw_html = new_raw_html + '<div class="heading">' + heading
|
||||||
|
if item.startswith('var heading2 ='):
|
||||||
|
heading = item.replace('var heading2 = \'', '')
|
||||||
|
heading = heading.replace('\'', '')
|
||||||
|
heading = heading.replace(';', '')
|
||||||
|
if heading <> '':
|
||||||
|
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
|
||||||
|
else:
|
||||||
|
new_raw_html = new_raw_html + '</div>'
|
||||||
|
if item.startswith('var content ='):
|
||||||
|
content = item.replace("var content = ", '')
|
||||||
|
content = content.replace('\'', '')
|
||||||
|
content = content.replace(';', '')
|
||||||
|
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
|
||||||
|
if item.startswith('var photocontent ='):
|
||||||
|
photo = item.replace('var photocontent = \'', '')
|
||||||
|
photo = photo.replace('\'', '')
|
||||||
|
photo = photo.replace(';', '')
|
||||||
|
photo = photo.replace('<tr>', '')
|
||||||
|
photo = photo.replace('<td>', '')
|
||||||
|
photo = photo.replace('</tr>', '')
|
||||||
|
photo = photo.replace('</td>', '<br>')
|
||||||
|
photo = photo.replace('class="photo"', '')
|
||||||
|
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||||
|
return new_raw_html + '</body></html>'
|
||||||
|
else:
|
||||||
|
# .txt based file
|
||||||
splitter = re.compile(r'\n') # Match non-digits
|
splitter = re.compile(r'\n') # Match non-digits
|
||||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||||
next_is_img_txt = False
|
next_is_img_txt = False
|
||||||
@ -604,7 +678,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
po = self.play_order_counter
|
po = self.play_order_counter
|
||||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
|
||||||
play_order=po, author=auth, description=desc)
|
play_order=po, author=auth, description=desc)
|
||||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||||
for sp in a.sub_pages:
|
for sp in a.sub_pages:
|
||||||
|
17
recipes/penguin_news.recipe
Normal file
17
recipes/penguin_news.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MercoPress(BasicNewsRecipe):
|
||||||
|
title = u'Penguin News'
|
||||||
|
description = u"Penguin News: the Falkland Islands' only newspaper."
|
||||||
|
cover_url = 'http://www.penguin-news.com/templates/rt_syndicate_j15/images/logo/light/logo1.png'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
__author__ = 'Russell Phillips'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
|
||||||
|
|
||||||
|
feeds = [(u'Penguin News - Falkland Islands', u'http://www.penguin-news.com/index.php?format=feed&type=rss')]
|
29
recipes/revista_piaui.recipe
Normal file
29
recipes/revista_piaui.recipe
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class RevistaPiaui(BasicNewsRecipe):
|
||||||
|
title = u'Revista piau\xed'
|
||||||
|
language = 'pt_BR'
|
||||||
|
__author__ = u'Eduardo Gustini Simões'
|
||||||
|
oldest_article = 31
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'Edi\xe7\xe3o Atual', u'http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')]
|
||||||
|
|
||||||
|
def parse_feeds (self):
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
for feed in feeds:
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
soup = self.index_to_soup('http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')
|
||||||
|
itemTitle = article.title.partition('|')[0].rstrip()
|
||||||
|
item = soup.find(text=itemTitle)
|
||||||
|
articleDescription = item.parent.parent.description.string.partition('<br />')[2]
|
||||||
|
article.summary = articleDescription
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
h2 = soup.find('h2')
|
||||||
|
h2.string.replaceWith(h2.string.partition('|')[0].rstrip())
|
||||||
|
h2.replaceWith(h2.prettify() + '<p><em>' + article.summary + '</em></p><p><em>' + ' posted at ' + article.localtime.strftime('%d-%m-%Y') + '</em></p>')
|
@ -9,285 +9,79 @@ calibre recipe for slate.com
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Comment, Tag
|
|
||||||
|
|
||||||
class Slate(BasicNewsRecipe):
|
class Slate(BasicNewsRecipe):
|
||||||
# Method variables for customizing downloads
|
|
||||||
description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
|
description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
|
||||||
__author__ = 'GRiker, Sujata Raman and Nick Redding'
|
__author__ = 'Kovid Goyal'
|
||||||
max_articles_per_feed = 100
|
|
||||||
oldest_article = 14
|
|
||||||
recursions = 0
|
|
||||||
delay = 0
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
timeout = 120.0
|
|
||||||
timefmt = ''
|
timefmt = ''
|
||||||
feeds = None
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = None
|
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
title = 'Slate'
|
||||||
slate_complete = True
|
INDEX = 'http://slate.com'
|
||||||
if slate_complete:
|
encoding = 'utf-8'
|
||||||
title = 'Slate (complete)'
|
preprocess_regexps = [
|
||||||
else:
|
(re.compile(r'<!--.*?-->', re.DOTALL), lambda x: ''),
|
||||||
title = 'Slate (weekly)'
|
(re.compile(r'^.*?<html', re.DOTALL), lambda x:'<html'),
|
||||||
|
(re.compile(r'<meta[^>]+?/>', re.DOTALL), lambda x:''),
|
||||||
# Method variables for customizing feed parsing
|
]
|
||||||
summary_length = 250
|
remove_tags = [
|
||||||
use_embedded_content = None
|
{'name':['link', 'script']},
|
||||||
|
{'class':['share-box-flank', 'sl-crumbs', 'sl-tbar',
|
||||||
# Method variables for pre/post processing of HTML
|
'sl-chunky-tbar']},
|
||||||
preprocess_regexps = [ (re.compile(r'<p><em>Disclosure: <strong>Slate</strong> is owned by the Washington Post.*</p>',
|
]
|
||||||
re.DOTALL|re.IGNORECASE),
|
remove_tags_after = [{'class':'sl-art-creds-cntr'}]
|
||||||
lambda match: ''),
|
keep_only_tags = {'class':'sl-body-wrapper'}
|
||||||
(re.compile(r'<p><strong><em>Join the discussion about this story on.*</p>',
|
remove_attributes = ['style']
|
||||||
re.DOTALL|re.IGNORECASE),
|
|
||||||
lambda match: '') ]
|
|
||||||
|
|
||||||
match_regexps = []
|
|
||||||
|
|
||||||
# The second entry is for 'Big Money', which comes from a different site, uses different markup
|
|
||||||
keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body']}),
|
|
||||||
dict(attrs={ 'id':['content']}) ]
|
|
||||||
|
|
||||||
# The second entry is for 'Big Money', which comes from a different site, uses different markup
|
|
||||||
remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper',
|
|
||||||
'article_bottom_tools_cntr','fray_article_discussion','fray_article_links','bottom_sponsored_links','author_bio',
|
|
||||||
'bizbox_links_bottom','ris_links_wrapper','BOXXLE',
|
|
||||||
'comments_button','add_comments_button','comments-to-fray','marriott_ad',
|
|
||||||
'article_bottom_tools','recommend_tab2','fbog_article_bottom_cntr']}),
|
|
||||||
dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ]
|
|
||||||
|
|
||||||
excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast']
|
|
||||||
excludedTitleKeywords = ['Gabfest','Slate V','on Twitter']
|
|
||||||
excludedAuthorKeywords = []
|
|
||||||
excludedContentKeywords = ['http://twitter.com/Slate']
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
.h1_subhead{font-family:Arial; font-size:small; }
|
|
||||||
h1{font-family:Verdana; font-size:large; }
|
|
||||||
.byline {font-family:Georgia; margin-bottom: 0px; }
|
|
||||||
.dateline {font-family:Arial; font-size: smaller; height: 0pt;}
|
|
||||||
.imagewrapper {font-family:Verdana;font-size:x-small; }
|
|
||||||
.source {font-family:Verdana; font-size:x-small;}
|
|
||||||
.credit {font-family:Verdana; font-size: smaller;}
|
|
||||||
#article_body {font-family:Verdana; }
|
|
||||||
#content {font-family:Arial; }
|
|
||||||
.caption{font-family:Verdana;font-style:italic; font-size:x-small;}
|
|
||||||
h3{font-family:Arial; font-size:small}
|
|
||||||
'''
|
|
||||||
|
|
||||||
# Local variables to extend class
|
|
||||||
baseURL = 'http://slate.com'
|
|
||||||
section_dates = []
|
|
||||||
|
|
||||||
# class extension methods
|
|
||||||
def tag_to_strings(self, tag):
|
|
||||||
if not tag:
|
|
||||||
return ''
|
|
||||||
if isinstance(tag, basestring):
|
|
||||||
return tag
|
|
||||||
strings = []
|
|
||||||
for item in tag.contents:
|
|
||||||
if isinstance(item, (NavigableString, CData)):
|
|
||||||
strings.append(item.string)
|
|
||||||
elif isinstance(item, Tag):
|
|
||||||
res = self.tag_to_string(item,use_alt=False)
|
|
||||||
if res:
|
|
||||||
strings.append(res)
|
|
||||||
return strings
|
|
||||||
|
|
||||||
def extract_named_sections(self):
|
|
||||||
soup = self.index_to_soup( self.baseURL )
|
|
||||||
soup_nav_bar = soup.find(True, attrs={'id':'nav'})
|
|
||||||
briefing_nav = soup.find('li')
|
|
||||||
briefing_url = briefing_nav.a['href']
|
|
||||||
for section_nav in soup_nav_bar.findAll('li'):
|
|
||||||
section_name = self.tag_to_string(section_nav,use_alt=False)
|
|
||||||
self.section_dates.append(section_name)
|
|
||||||
|
|
||||||
soup = self.index_to_soup(briefing_url)
|
|
||||||
|
|
||||||
self.log("Briefing url = %s " % briefing_url)
|
|
||||||
section_lists = soup.findAll('ul','view_links_list')
|
|
||||||
|
|
||||||
sections = []
|
|
||||||
for section in section_lists :
|
|
||||||
sections.append(section)
|
|
||||||
return sections
|
|
||||||
|
|
||||||
|
|
||||||
def extract_dated_sections(self):
|
|
||||||
soup = self.index_to_soup( self.baseURL )
|
|
||||||
soup_top_stories = soup.find(True, attrs={'id':'tap3_cntr'})
|
|
||||||
if soup_top_stories:
|
|
||||||
self.section_dates.append("Top Stories")
|
|
||||||
self.log("SELECTION TOP STORIES %s" % "Top Stories")
|
|
||||||
|
|
||||||
soup = soup.find(True, attrs={'id':'toc_links_container'})
|
|
||||||
|
|
||||||
todays_section = soup.find(True, attrs={'class':'todaydateline'})
|
|
||||||
self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
|
|
||||||
self.log("SELECTION DATE %s" % self.tag_to_string(todays_section,use_alt=False))
|
|
||||||
|
|
||||||
older_section_dates = soup.findAll(True, attrs={'class':'maindateline'})
|
|
||||||
for older_section in older_section_dates :
|
|
||||||
self.section_dates.append(self.tag_to_string(older_section,use_alt=False))
|
|
||||||
self.log("SELECTION DATE %s" % self.tag_to_string(older_section,use_alt=False))
|
|
||||||
|
|
||||||
if soup_top_stories:
|
|
||||||
headline_stories = soup_top_stories
|
|
||||||
self.log("HAVE top_stories")
|
|
||||||
else:
|
|
||||||
headline_stories = None
|
|
||||||
self.log("NO top_stories")
|
|
||||||
section_lists = soup.findAll('ul')
|
|
||||||
# Prepend the headlines to the first section
|
|
||||||
if headline_stories:
|
|
||||||
section_lists.insert(0,headline_stories)
|
|
||||||
|
|
||||||
sections = []
|
|
||||||
for section in section_lists :
|
|
||||||
sections.append(section)
|
|
||||||
return sections
|
|
||||||
|
|
||||||
|
|
||||||
def extract_section_articles(self, sections_html) :
|
|
||||||
# Find the containers with section content
|
|
||||||
sections = sections_html
|
|
||||||
|
|
||||||
articles = {}
|
|
||||||
key = None
|
|
||||||
ans = []
|
|
||||||
|
|
||||||
for (i,section) in enumerate(sections) :
|
|
||||||
|
|
||||||
# Get the section name
|
|
||||||
if section.has_key('id') :
|
|
||||||
self.log("PROCESSING SECTION id = %s" % section['id'])
|
|
||||||
key = self.section_dates[i]
|
|
||||||
if key.startswith("Pod"):
|
|
||||||
continue
|
|
||||||
if key.startswith("Blog"):
|
|
||||||
continue
|
|
||||||
articles[key] = []
|
|
||||||
ans.append(key)
|
|
||||||
elif self.slate_complete:
|
|
||||||
key = self.section_dates[i]
|
|
||||||
if key.startswith("Pod"):
|
|
||||||
continue
|
|
||||||
if key.startswith("Blog"):
|
|
||||||
continue
|
|
||||||
self.log("PROCESSING SECTION name = %s" % key)
|
|
||||||
articles[key] = []
|
|
||||||
ans.append(key)
|
|
||||||
else :
|
|
||||||
self.log("SECTION %d HAS NO id" % i);
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get the section article_list
|
|
||||||
article_list = section.findAll('li')
|
|
||||||
|
|
||||||
# Extract the article attributes
|
|
||||||
for article in article_list :
|
|
||||||
bylines = self.tag_to_strings(article)
|
|
||||||
url = article.a['href']
|
|
||||||
title = bylines[0]
|
|
||||||
full_title = self.tag_to_string(article,use_alt=False)
|
|
||||||
#self.log("ARTICLE TITLE%s" % title)
|
|
||||||
#self.log("ARTICLE FULL_TITLE%s" % full_title)
|
|
||||||
#self.log("URL %s" % url)
|
|
||||||
author = None
|
|
||||||
description = None
|
|
||||||
pubdate = None
|
|
||||||
|
|
||||||
if len(bylines) == 2 and self.tag_to_string(article).find("Today's Papers") > 0 :
|
|
||||||
description = "A summary of what's in the major U.S. newspapers."
|
|
||||||
|
|
||||||
if len(bylines) == 3 :
|
|
||||||
author = bylines[2].strip()
|
|
||||||
author = re.sub('[\r][\n][\t][\t\t]','', author)
|
|
||||||
author = re.sub(',','', author)
|
|
||||||
if bylines[1] is not None :
|
|
||||||
description = bylines[1]
|
|
||||||
full_byline = self.tag_to_string(article)
|
|
||||||
if full_byline.find('major U.S. newspapers') > 0 :
|
|
||||||
description = "A summary of what's in the major U.S. newspapers."
|
|
||||||
|
|
||||||
if len(bylines) > 3 and author is not None:
|
|
||||||
author += " | "
|
|
||||||
for (i,substring) in enumerate(bylines[3:]) :
|
|
||||||
#print "substring: %s" % substring.encode('cp1252')
|
|
||||||
author += substring.strip()
|
|
||||||
if i < len(bylines[3:]) :
|
|
||||||
author += " | "
|
|
||||||
|
|
||||||
# Skip articles whose descriptions contain excluded keywords
|
|
||||||
if description is not None and len(self.excludedDescriptionKeywords):
|
|
||||||
excluded = re.compile('|'.join(self.excludedDescriptionKeywords))
|
|
||||||
found_excluded = excluded.search(description)
|
|
||||||
if found_excluded :
|
|
||||||
self.log(" >>> skipping %s (description keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Skip articles whose title contain excluded keywords
|
|
||||||
if full_title is not None and len(self.excludedTitleKeywords):
|
|
||||||
excluded = re.compile('|'.join(self.excludedTitleKeywords))
|
|
||||||
#self.log("evaluating full_title: %s" % full_title)
|
|
||||||
found_excluded = excluded.search(full_title)
|
|
||||||
if found_excluded :
|
|
||||||
self.log(" >>> skipping %s (title keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Skip articles whose author contain excluded keywords
|
|
||||||
if author is not None and len(self.excludedAuthorKeywords):
|
|
||||||
excluded = re.compile('|'.join(self.excludedAuthorKeywords))
|
|
||||||
found_excluded = excluded.search(author)
|
|
||||||
if found_excluded :
|
|
||||||
self.log(" >>> skipping %s (author keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
|
|
||||||
continue
|
|
||||||
|
|
||||||
skip_this_article = False
|
|
||||||
# Check to make sure we're not adding a duplicate
|
|
||||||
for article in articles[key] :
|
|
||||||
if article['url'] == url :
|
|
||||||
skip_this_article = True
|
|
||||||
self.log("SKIPPING DUP %s" % url)
|
|
||||||
break
|
|
||||||
|
|
||||||
if skip_this_article :
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Build the dictionary entry for this article
|
|
||||||
feed = key
|
|
||||||
if not articles.has_key(feed) :
|
|
||||||
articles[feed] = []
|
|
||||||
articles[feed].append(dict(title=title, url=url, date=pubdate, description=description,
|
|
||||||
author=author, content=''))
|
|
||||||
#self.log("KEY %s" % feed)
|
|
||||||
#self.log("APPENDED %s" % url)
|
|
||||||
# Promote 'newspapers' to top
|
|
||||||
for (i,article) in enumerate(articles[feed]) :
|
|
||||||
if article['description'] is not None :
|
|
||||||
if article['description'].find('newspapers') > 0 :
|
|
||||||
articles[feed].insert(0,articles[feed].pop(i))
|
|
||||||
|
|
||||||
|
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + 'pagenum/all/'
|
return url.replace('.html', '.single.html')
|
||||||
|
|
||||||
# Class methods
|
|
||||||
def parse_index(self) :
|
def parse_index(self) :
|
||||||
if self.slate_complete:
|
ans = []
|
||||||
sections = self.extract_named_sections()
|
for sectitle, url in (
|
||||||
else:
|
('News & Politics', '/articles/news_and_politics.html'),
|
||||||
sections = self.extract_dated_sections()
|
('Technology', '/articles/technology.html'),
|
||||||
section_list = self.extract_section_articles(sections)
|
('Business', '/articles/business.html'),
|
||||||
return section_list
|
('Arts', '/articles/arts.html'),
|
||||||
|
('Life', '/articles/life.html'),
|
||||||
|
('Health & Science', '/articles/health_and_science.html'),
|
||||||
|
('Sports', '/articles/sports.html'),
|
||||||
|
('Double X', '/articles/double_x.html'),
|
||||||
|
):
|
||||||
|
url = self.INDEX + url
|
||||||
|
self.log('Found section:', sectitle)
|
||||||
|
articles = self.slate_section_articles(self.index_to_soup(url))
|
||||||
|
if articles:
|
||||||
|
ans.append((sectitle, articles))
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def slate_section_articles(self, soup):
|
||||||
|
cont = soup.find('div', id='most_read')
|
||||||
|
seen = set()
|
||||||
|
ans = []
|
||||||
|
for h4 in cont.findAll('h4'):
|
||||||
|
a = h4.find('a', href=True)
|
||||||
|
if a is None: continue
|
||||||
|
url = a['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = self.INDEX + url
|
||||||
|
if url in seen: continue
|
||||||
|
seen.add(url)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
parent = h4.parent
|
||||||
|
h3 = parent.find('h3')
|
||||||
|
desc = ''
|
||||||
|
if h3 is not None:
|
||||||
|
desc = self.tag_to_string(h3)
|
||||||
|
a = parent.find('a', rel='author')
|
||||||
|
if a is not None:
|
||||||
|
a = self.tag_to_string(a)
|
||||||
|
art = {'title':title, 'description':desc, 'date':'', 'url':url}
|
||||||
|
if a:
|
||||||
|
art['author'] = a
|
||||||
|
self.log('\tFound article:', title, ' by ', a)
|
||||||
|
ans.append(art)
|
||||||
|
return ans
|
||||||
|
|
||||||
def get_masthead_url(self):
|
def get_masthead_url(self):
|
||||||
masthead = 'http://img.slate.com/images/redesign2008/slate_logo.gif'
|
masthead = 'http://img.slate.com/images/redesign2008/slate_logo.gif'
|
||||||
@ -299,153 +93,4 @@ class Slate(BasicNewsRecipe):
|
|||||||
masthead = None
|
masthead = None
|
||||||
return masthead
|
return masthead
|
||||||
|
|
||||||
def stripAnchors(self,soup):
|
|
||||||
body = soup.find('div',attrs={'id':['article_body','content']})
|
|
||||||
if body is not None:
|
|
||||||
paras = body.findAll('p')
|
|
||||||
if paras is not None:
|
|
||||||
for para in paras:
|
|
||||||
aTags = para.findAll('a')
|
|
||||||
if aTags is not None:
|
|
||||||
for a in aTags:
|
|
||||||
if a.img is None:
|
|
||||||
#print repr(a.renderContents())
|
|
||||||
a.replaceWith(a.renderContents().decode('utf-8','replace'))
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def preprocess_html(self, soup) :
|
|
||||||
|
|
||||||
# Remove 'grayPlus4.png' images
|
|
||||||
imgs = soup.findAll('img')
|
|
||||||
if imgs is not None:
|
|
||||||
for img in imgs:
|
|
||||||
if re.search("grayPlus4.png",str(img)):
|
|
||||||
img.extract()
|
|
||||||
|
|
||||||
# Delete article based upon content keywords
|
|
||||||
if len(self.excludedDescriptionKeywords):
|
|
||||||
excluded = re.compile('|'.join(self.excludedContentKeywords))
|
|
||||||
found_excluded = excluded.search(str(soup))
|
|
||||||
if found_excluded :
|
|
||||||
print "No allowed content found, removing article"
|
|
||||||
raise Exception('Rejected article')
|
|
||||||
|
|
||||||
# Articles from www.thebigmoney.com use different tagging for byline, dateline and body
|
|
||||||
head = soup.find('head')
|
|
||||||
if head.link is not None and re.search('www\.thebigmoney\.com', str(head)):
|
|
||||||
byline = soup.find('div',attrs={'id':'byline'})
|
|
||||||
if byline is not None:
|
|
||||||
byline['class'] = byline['id']
|
|
||||||
|
|
||||||
dateline = soup.find('div',attrs={'id':'dateline'})
|
|
||||||
if dateline is not None:
|
|
||||||
dateline['class'] = dateline['id']
|
|
||||||
|
|
||||||
body = soup.find('div',attrs={'id':'content'})
|
|
||||||
if body is not None:
|
|
||||||
body['class'] = 'article_body'
|
|
||||||
|
|
||||||
# Synthesize a department kicker
|
|
||||||
h3Tag = Tag(soup,'h3')
|
|
||||||
emTag = Tag(soup,'em')
|
|
||||||
emTag.insert(0,NavigableString("the big money: Today's business press"))
|
|
||||||
h3Tag.insert(0,emTag)
|
|
||||||
soup.body.insert(0,h3Tag)
|
|
||||||
|
|
||||||
# Strip anchors from HTML
|
|
||||||
return self.stripAnchors(soup)
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch) :
|
|
||||||
|
|
||||||
# Fix up dept_kicker as <h3><em>
|
|
||||||
dept_kicker = soup.find('div', attrs={'class':'department_kicker'})
|
|
||||||
if dept_kicker is not None :
|
|
||||||
kicker_strings = self.tag_to_strings(dept_kicker)
|
|
||||||
kicker = ''.join(kicker_strings[2:])
|
|
||||||
kicker = re.sub('\.','',kicker)
|
|
||||||
h3Tag = Tag(soup, "h3")
|
|
||||||
emTag = Tag(soup, "em")
|
|
||||||
emTag.insert(0,NavigableString(kicker))
|
|
||||||
h3Tag.insert(0, emTag)
|
|
||||||
dept_kicker.replaceWith(h3Tag)
|
|
||||||
else:
|
|
||||||
self.log("No kicker--return null")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Fix up the concatenated byline and dateline
|
|
||||||
byline = soup.find(True,attrs={'class':'byline'})
|
|
||||||
if byline is not None :
|
|
||||||
bylineTag = Tag(soup,'div')
|
|
||||||
bylineTag['class'] = 'byline'
|
|
||||||
#bylineTag['height'] = '0em'
|
|
||||||
bylineTag.insert(0,self.tag_to_string(byline))
|
|
||||||
byline.replaceWith(bylineTag)
|
|
||||||
|
|
||||||
dateline = soup.find(True, attrs={'class':'dateline'})
|
|
||||||
if dateline is not None :
|
|
||||||
datelineTag = Tag(soup, 'div')
|
|
||||||
datelineTag['class'] = 'dateline'
|
|
||||||
#datelineTag['margin-top'] = '0em'
|
|
||||||
datelineTag.insert(0,self.tag_to_string(dateline))
|
|
||||||
dateline.replaceWith(datelineTag)
|
|
||||||
|
|
||||||
# Change captions to italic, add <hr>
|
|
||||||
for caption in soup.findAll(True, {'class':'caption'}) :
|
|
||||||
if caption is not None:
|
|
||||||
emTag = Tag(soup, "em")
|
|
||||||
emTag.insert(0, '<br />' + self.tag_to_string(caption))
|
|
||||||
hrTag = Tag(soup, 'hr')
|
|
||||||
emTag.insert(1, hrTag)
|
|
||||||
caption.replaceWith(emTag)
|
|
||||||
|
|
||||||
# Fix photos
|
|
||||||
for photo in soup.findAll('span',attrs={'class':'imagewrapper'}):
|
|
||||||
if photo.a is not None and photo.a.img is not None:
|
|
||||||
divTag = Tag(soup,'div')
|
|
||||||
divTag['class'] ='imagewrapper'
|
|
||||||
divTag.insert(0,photo.a.img)
|
|
||||||
photo.replaceWith(divTag)
|
|
||||||
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log) :
|
|
||||||
|
|
||||||
def extract_byline(href) :
|
|
||||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
|
||||||
byline = soup.find(True,attrs={'class':'byline'})
|
|
||||||
if byline is not None:
|
|
||||||
return self.tag_to_string(byline,use_alt=False)
|
|
||||||
else :
|
|
||||||
return None
|
|
||||||
|
|
||||||
def extract_description(href) :
|
|
||||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
|
||||||
paragraphs = soup.findAll('p')
|
|
||||||
for p in paragraphs :
|
|
||||||
if self.tag_to_string(p,use_alt=False).startswith('By ') or \
|
|
||||||
self.tag_to_string(p,use_alt=False).startswith('Posted '):
|
|
||||||
continue
|
|
||||||
comment = p.find(text=lambda text:isinstance(text, Comment))
|
|
||||||
if comment is not None:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
return self.tag_to_string(p,use_alt=False)[:self.summary_length] + '...'
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Method entry point here
|
|
||||||
# Single section toc looks different than multi-section tocs
|
|
||||||
if oeb.toc.depth() == 2 :
|
|
||||||
for article in oeb.toc :
|
|
||||||
if article.author is None :
|
|
||||||
article.author = extract_byline(article.href)
|
|
||||||
if article.description is None :
|
|
||||||
article.description = extract_description(article.href)
|
|
||||||
elif oeb.toc.depth() == 3 :
|
|
||||||
for section in oeb.toc :
|
|
||||||
for article in section :
|
|
||||||
if article.author is None :
|
|
||||||
article.author = extract_byline(article.href)
|
|
||||||
if article.description is None :
|
|
||||||
article.description = extract_description(article.href)
|
|
||||||
|
|
||||||
|
17
recipes/wow.recipe
Normal file
17
recipes/wow.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class WoW(BasicNewsRecipe):
|
||||||
|
title = u'WoW Insider'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 1 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('WoW',
|
||||||
|
'http://wow.joystiq.com/rss.xml')
|
||||||
|
]
|
@ -224,6 +224,9 @@ try:
|
|||||||
except:
|
except:
|
||||||
try:
|
try:
|
||||||
HOST=get_ip_address('wlan0')
|
HOST=get_ip_address('wlan0')
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
HOST=get_ip_address('ppp0')
|
||||||
except:
|
except:
|
||||||
HOST='192.168.1.2'
|
HOST='192.168.1.2'
|
||||||
|
|
||||||
|
@ -336,7 +336,7 @@ class Build(Command):
|
|||||||
oinc = ['/Fo'+obj] if iswindows else ['-o', obj]
|
oinc = ['/Fo'+obj] if iswindows else ['-o', obj]
|
||||||
cmd = [compiler] + cflags + ext.cflags + einc + sinc + oinc
|
cmd = [compiler] + cflags + ext.cflags + einc + sinc + oinc
|
||||||
self.info(' '.join(cmd))
|
self.info(' '.join(cmd))
|
||||||
subprocess.check_call(cmd)
|
self.check_call(cmd)
|
||||||
|
|
||||||
dest = self.dest(ext)
|
dest = self.dest(ext)
|
||||||
elib = self.lib_dirs_to_ldflags(ext.lib_dirs)
|
elib = self.lib_dirs_to_ldflags(ext.lib_dirs)
|
||||||
@ -350,18 +350,32 @@ class Build(Command):
|
|||||||
else:
|
else:
|
||||||
cmd += objects + ext.extra_objs + ['-o', dest] + ldflags + ext.ldflags + elib + xlib
|
cmd += objects + ext.extra_objs + ['-o', dest] + ldflags + ext.ldflags + elib + xlib
|
||||||
self.info('\n\n', ' '.join(cmd), '\n\n')
|
self.info('\n\n', ' '.join(cmd), '\n\n')
|
||||||
subprocess.check_call(cmd)
|
self.check_call(cmd)
|
||||||
if iswindows:
|
if iswindows:
|
||||||
#manifest = dest+'.manifest'
|
#manifest = dest+'.manifest'
|
||||||
#cmd = [MT, '-manifest', manifest, '-outputresource:%s;2'%dest]
|
#cmd = [MT, '-manifest', manifest, '-outputresource:%s;2'%dest]
|
||||||
#self.info(*cmd)
|
#self.info(*cmd)
|
||||||
#subprocess.check_call(cmd)
|
#self.check_call(cmd)
|
||||||
#os.remove(manifest)
|
#os.remove(manifest)
|
||||||
for x in ('.exp', '.lib'):
|
for x in ('.exp', '.lib'):
|
||||||
x = os.path.splitext(dest)[0]+x
|
x = os.path.splitext(dest)[0]+x
|
||||||
if os.path.exists(x):
|
if os.path.exists(x):
|
||||||
os.remove(x)
|
os.remove(x)
|
||||||
|
|
||||||
|
def check_call(self, *args, **kwargs):
|
||||||
|
"""print cmdline if an error occured
|
||||||
|
|
||||||
|
If something is missing (qmake e.g.) you get a non-informative error
|
||||||
|
self.check_call(qmc + [ext.name+'.pro'])
|
||||||
|
so you would have to look a the source to see the actual command.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
subprocess.check_call(*args, **kwargs)
|
||||||
|
except:
|
||||||
|
cmdline = ' '.join(['"%s"' % (arg) if ' ' in arg else arg for arg in args[0]])
|
||||||
|
print "Error while executing: %s\n" % (cmdline)
|
||||||
|
raise
|
||||||
|
|
||||||
def build_qt_objects(self, ext):
|
def build_qt_objects(self, ext):
|
||||||
obj_pat = 'release\\*.obj' if iswindows else '*.o'
|
obj_pat = 'release\\*.obj' if iswindows else '*.o'
|
||||||
objects = glob.glob(obj_pat)
|
objects = glob.glob(obj_pat)
|
||||||
@ -380,8 +394,8 @@ class Build(Command):
|
|||||||
qmc = [QMAKE, '-o', 'Makefile']
|
qmc = [QMAKE, '-o', 'Makefile']
|
||||||
if iswindows:
|
if iswindows:
|
||||||
qmc += ['-spec', 'win32-msvc2008']
|
qmc += ['-spec', 'win32-msvc2008']
|
||||||
subprocess.check_call(qmc + [ext.name+'.pro'])
|
self.check_call(qmc + [ext.name+'.pro'])
|
||||||
subprocess.check_call([make, '-f', 'Makefile'])
|
self.check_call([make, '-f', 'Makefile'])
|
||||||
objects = glob.glob(obj_pat)
|
objects = glob.glob(obj_pat)
|
||||||
return list(map(self.a, objects))
|
return list(map(self.a, objects))
|
||||||
|
|
||||||
@ -407,7 +421,7 @@ class Build(Command):
|
|||||||
cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+\
|
cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+\
|
||||||
pyqt.pyqt_sip_dir] + shlex.split(pyqt.pyqt_sip_flags) + [sipf]
|
pyqt.pyqt_sip_dir] + shlex.split(pyqt.pyqt_sip_flags) + [sipf]
|
||||||
self.info(' '.join(cmd))
|
self.info(' '.join(cmd))
|
||||||
subprocess.check_call(cmd)
|
self.check_call(cmd)
|
||||||
module = self.j(src_dir, self.b(dest))
|
module = self.j(src_dir, self.b(dest))
|
||||||
if self.newer(dest, [sbf]+qt_objects):
|
if self.newer(dest, [sbf]+qt_objects):
|
||||||
mf = self.j(src_dir, 'Makefile')
|
mf = self.j(src_dir, 'Makefile')
|
||||||
@ -417,7 +431,7 @@ class Build(Command):
|
|||||||
makefile.extra_include_dirs = ext.inc_dirs
|
makefile.extra_include_dirs = ext.inc_dirs
|
||||||
makefile.generate()
|
makefile.generate()
|
||||||
|
|
||||||
subprocess.check_call([make, '-f', mf], cwd=src_dir)
|
self.check_call([make, '-f', mf], cwd=src_dir)
|
||||||
shutil.copy2(module, dest)
|
shutil.copy2(module, dest)
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
@ -457,7 +471,7 @@ class BuildPDF2XML(Command):
|
|||||||
cmd += ['-I'+x for x in poppler_inc_dirs+magick_inc_dirs]
|
cmd += ['-I'+x for x in poppler_inc_dirs+magick_inc_dirs]
|
||||||
cmd += ['/Fo'+obj, src]
|
cmd += ['/Fo'+obj, src]
|
||||||
self.info(*cmd)
|
self.info(*cmd)
|
||||||
subprocess.check_call(cmd)
|
self.check_call(cmd)
|
||||||
objects.append(obj)
|
objects.append(obj)
|
||||||
|
|
||||||
if self.newer(dest, objects):
|
if self.newer(dest, objects):
|
||||||
@ -470,7 +484,7 @@ class BuildPDF2XML(Command):
|
|||||||
png_libs+magick_libs+poppler_libs+ft_libs+jpg_libs+pdfreflow_libs]
|
png_libs+magick_libs+poppler_libs+ft_libs+jpg_libs+pdfreflow_libs]
|
||||||
cmd += ['/OUT:'+dest] + objects
|
cmd += ['/OUT:'+dest] + objects
|
||||||
self.info(*cmd)
|
self.info(*cmd)
|
||||||
subprocess.check_call(cmd)
|
self.check_call(cmd)
|
||||||
|
|
||||||
self.info('Binary installed as', dest)
|
self.info('Binary installed as', dest)
|
||||||
|
|
||||||
|
@ -20,17 +20,23 @@ for x in [
|
|||||||
EXCLUDES.extend(['--exclude', x])
|
EXCLUDES.extend(['--exclude', x])
|
||||||
SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
|
SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
|
||||||
|
|
||||||
|
def get_rsync_pw():
|
||||||
|
return open('/home/kovid/work/kde/conf/buildbot').read().partition(
|
||||||
|
':')[-1].strip()
|
||||||
|
|
||||||
class Rsync(Command):
|
class Rsync(Command):
|
||||||
|
|
||||||
description = 'Sync source tree from development machine'
|
description = 'Sync source tree from development machine'
|
||||||
|
|
||||||
SYNC_CMD = ' '.join(BASE_RSYNC+SAFE_EXCLUDES+
|
SYNC_CMD = ' '.join(BASE_RSYNC+SAFE_EXCLUDES+
|
||||||
['rsync://{host}/work/{project}', '..'])
|
['rsync://buildbot@{host}/work/{project}', '..'])
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
cmd = self.SYNC_CMD.format(host=HOST, project=PROJECT)
|
cmd = self.SYNC_CMD.format(host=HOST, project=PROJECT)
|
||||||
|
env = dict(os.environ)
|
||||||
|
env['RSYNC_PASSWORD'] = get_rsync_pw()
|
||||||
self.info(cmd)
|
self.info(cmd)
|
||||||
subprocess.check_call(cmd, shell=True)
|
subprocess.check_call(cmd, shell=True, env=env)
|
||||||
|
|
||||||
|
|
||||||
class Push(Command):
|
class Push(Command):
|
||||||
@ -81,7 +87,8 @@ class VMInstaller(Command):
|
|||||||
|
|
||||||
|
|
||||||
def get_build_script(self):
|
def get_build_script(self):
|
||||||
ans = '\n'.join(self.BUILD_PREFIX)+'\n\n'
|
rs = ['export RSYNC_PASSWORD=%s'%get_rsync_pw()]
|
||||||
|
ans = '\n'.join(self.BUILD_PREFIX + rs)+'\n\n'
|
||||||
ans += ' && \\\n'.join(self.BUILD_RSYNC) + ' && \\\n'
|
ans += ' && \\\n'.join(self.BUILD_RSYNC) + ' && \\\n'
|
||||||
ans += ' && \\\n'.join(self.BUILD_CLEAN) + ' && \\\n'
|
ans += ' && \\\n'.join(self.BUILD_CLEAN) + ' && \\\n'
|
||||||
ans += ' && \\\n'.join(self.BUILD_BUILD) + ' && \\\n'
|
ans += ' && \\\n'.join(self.BUILD_BUILD) + ' && \\\n'
|
||||||
|
@ -278,6 +278,8 @@ def get_proxies(debug=True):
|
|||||||
continue
|
continue
|
||||||
if proxy.startswith(key+'://'):
|
if proxy.startswith(key+'://'):
|
||||||
proxy = proxy[len(key)+3:]
|
proxy = proxy[len(key)+3:]
|
||||||
|
if key == 'https' and proxy.startswith('http://'):
|
||||||
|
proxy = proxy[7:]
|
||||||
if proxy.endswith('/'):
|
if proxy.endswith('/'):
|
||||||
proxy = proxy[:-1]
|
proxy = proxy[:-1]
|
||||||
if len(proxy) > 4:
|
if len(proxy) > 4:
|
||||||
|
@ -502,6 +502,7 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
from calibre.ebooks.comic.input import ComicInput
|
from calibre.ebooks.comic.input import ComicInput
|
||||||
|
from calibre.ebooks.djvu.input import DJVUInput
|
||||||
from calibre.ebooks.epub.input import EPUBInput
|
from calibre.ebooks.epub.input import EPUBInput
|
||||||
from calibre.ebooks.fb2.input import FB2Input
|
from calibre.ebooks.fb2.input import FB2Input
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
@ -555,7 +556,8 @@ from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
|||||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||||
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
|
||||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||||
from calibre.devices.prs505.driver import PRS505, PRST1
|
from calibre.devices.prs505.driver import PRS505
|
||||||
|
from calibre.devices.prst1.driver import PRST1
|
||||||
from calibre.devices.user_defined.driver import USER_DEFINED
|
from calibre.devices.user_defined.driver import USER_DEFINED
|
||||||
from calibre.devices.android.driver import ANDROID, S60, WEBOS
|
from calibre.devices.android.driver import ANDROID, S60, WEBOS
|
||||||
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||||
@ -599,6 +601,7 @@ plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
|||||||
|
|
||||||
plugins += [
|
plugins += [
|
||||||
ComicInput,
|
ComicInput,
|
||||||
|
DJVUInput,
|
||||||
EPUBInput,
|
EPUBInput,
|
||||||
FB2Input,
|
FB2Input,
|
||||||
HTMLInput,
|
HTMLInput,
|
||||||
@ -1143,6 +1146,16 @@ class StoreAmazonDEKindleStore(StoreBase):
|
|||||||
formats = ['KINDLE']
|
formats = ['KINDLE']
|
||||||
affiliate = True
|
affiliate = True
|
||||||
|
|
||||||
|
class StoreAmazonFRKindleStore(StoreBase):
|
||||||
|
name = 'Amazon FR Kindle'
|
||||||
|
author = 'Charles Haley'
|
||||||
|
description = u'Tous les ebooks Kindle'
|
||||||
|
actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonFRKindleStore'
|
||||||
|
|
||||||
|
headquarters = 'DE'
|
||||||
|
formats = ['KINDLE']
|
||||||
|
affiliate = True
|
||||||
|
|
||||||
class StoreAmazonUKKindleStore(StoreBase):
|
class StoreAmazonUKKindleStore(StoreBase):
|
||||||
name = 'Amazon UK Kindle'
|
name = 'Amazon UK Kindle'
|
||||||
author = 'Charles Haley'
|
author = 'Charles Haley'
|
||||||
@ -1520,6 +1533,7 @@ plugins += [
|
|||||||
StoreArchiveOrgStore,
|
StoreArchiveOrgStore,
|
||||||
StoreAmazonKindleStore,
|
StoreAmazonKindleStore,
|
||||||
StoreAmazonDEKindleStore,
|
StoreAmazonDEKindleStore,
|
||||||
|
StoreAmazonFRKindleStore,
|
||||||
StoreAmazonUKKindleStore,
|
StoreAmazonUKKindleStore,
|
||||||
StoreBaenWebScriptionStore,
|
StoreBaenWebScriptionStore,
|
||||||
StoreBNStore,
|
StoreBNStore,
|
||||||
|
@ -4,7 +4,6 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys
|
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
|
@ -217,7 +217,7 @@ class DevicePlugin(Plugin):
|
|||||||
'''
|
'''
|
||||||
Unix version of :meth:`can_handle_windows`
|
Unix version of :meth:`can_handle_windows`
|
||||||
|
|
||||||
:param device_info: Is a tupe of (vid, pid, bcd, manufacturer, product,
|
:param device_info: Is a tuple of (vid, pid, bcd, manufacturer, product,
|
||||||
serial number)
|
serial number)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -414,7 +414,8 @@ class DevicePlugin(Plugin):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def config_widget(cls):
|
def config_widget(cls):
|
||||||
'''
|
'''
|
||||||
Should return a QWidget. The QWidget contains the settings for the device interface
|
Should return a QWidget. The QWidget contains the settings for the
|
||||||
|
device interface
|
||||||
'''
|
'''
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@ -429,8 +430,9 @@ class DevicePlugin(Plugin):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def settings(cls):
|
def settings(cls):
|
||||||
'''
|
'''
|
||||||
Should return an opts object. The opts object should have at least one attribute
|
Should return an opts object. The opts object should have at least one
|
||||||
`format_map` which is an ordered list of formats for the device.
|
attribute `format_map` which is an ordered list of formats for the
|
||||||
|
device.
|
||||||
'''
|
'''
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@ -516,3 +518,9 @@ class BookList(list):
|
|||||||
'''
|
'''
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def prepare_addable_books(self, paths):
|
||||||
|
'''
|
||||||
|
Given a list of paths, returns another list of paths. These paths
|
||||||
|
point to addable versions of the books.
|
||||||
|
'''
|
||||||
|
return paths
|
||||||
|
@ -299,34 +299,3 @@ class PRS505(USBMS):
|
|||||||
f.write(metadata.thumbnail[-1])
|
f.write(metadata.thumbnail[-1])
|
||||||
debug_print('Cover uploaded to: %r'%cpath)
|
debug_print('Cover uploaded to: %r'%cpath)
|
||||||
|
|
||||||
class PRST1(USBMS):
|
|
||||||
name = 'SONY PRST1 and newer Device Interface'
|
|
||||||
gui_name = 'SONY Reader'
|
|
||||||
description = _('Communicate with Sony PRST1 and newer eBook readers')
|
|
||||||
author = 'Kovid Goyal'
|
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
|
||||||
|
|
||||||
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
|
|
||||||
VENDOR_ID = [0x054c] #: SONY Vendor Id
|
|
||||||
PRODUCT_ID = [0x05c2]
|
|
||||||
BCD = [0x226]
|
|
||||||
|
|
||||||
VENDOR_NAME = 'SONY'
|
|
||||||
WINDOWS_MAIN_MEM = re.compile(
|
|
||||||
r'(PRS-T1&)'
|
|
||||||
)
|
|
||||||
|
|
||||||
THUMBNAIL_HEIGHT = 217
|
|
||||||
SCAN_FROM_ROOT = True
|
|
||||||
EBOOK_DIR_MAIN = __appname__
|
|
||||||
|
|
||||||
|
|
||||||
def windows_filter_pnp_id(self, pnp_id):
|
|
||||||
return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
|
|
||||||
|
|
||||||
def get_carda_ebook_dir(self, for_upload=False):
|
|
||||||
if for_upload:
|
|
||||||
return __appname__
|
|
||||||
return self.EBOOK_DIR_CARD_A
|
|
||||||
|
|
||||||
|
|
||||||
|
7
src/calibre/devices/prst1/__init__.py
Normal file
7
src/calibre/devices/prst1/__init__.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
481
src/calibre/devices/prst1/driver.py
Normal file
481
src/calibre/devices/prst1/driver.py
Normal file
@ -0,0 +1,481 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Device driver for the SONY T1 devices
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os, time, re
|
||||||
|
import sqlite3 as sqlite
|
||||||
|
from contextlib import closing
|
||||||
|
|
||||||
|
from calibre.devices.usbms.driver import USBMS, debug_print
|
||||||
|
from calibre.devices.usbms.device import USBDevice
|
||||||
|
from calibre.devices.usbms.books import CollectionsBookList
|
||||||
|
from calibre.devices.usbms.books import BookList
|
||||||
|
from calibre.ebooks.metadata import authors_to_sort_string
|
||||||
|
from calibre.constants import islinux
|
||||||
|
|
||||||
|
DBPATH = 'Sony_Reader/database/books.db'
|
||||||
|
THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
|
||||||
|
|
||||||
|
class ImageWrapper(object):
|
||||||
|
def __init__(self, image_path):
|
||||||
|
self.image_path = image_path
|
||||||
|
|
||||||
|
class PRST1(USBMS):
|
||||||
|
name = 'SONY PRST1 and newer Device Interface'
|
||||||
|
gui_name = 'SONY Reader'
|
||||||
|
description = _('Communicate with the PRST1 and newer SONY eBook readers')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
path_sep = '/'
|
||||||
|
booklist_class = CollectionsBookList
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'pdf', 'txt']
|
||||||
|
CAN_SET_METADATA = ['collections']
|
||||||
|
CAN_DO_DEVICE_DB_PLUGBOARD = True
|
||||||
|
|
||||||
|
VENDOR_ID = [0x054c] #: SONY Vendor Id
|
||||||
|
PRODUCT_ID = [0x05c2]
|
||||||
|
BCD = [0x226]
|
||||||
|
|
||||||
|
VENDOR_NAME = 'SONY'
|
||||||
|
WINDOWS_MAIN_MEM = re.compile(
|
||||||
|
r'(PRS-T1&)'
|
||||||
|
)
|
||||||
|
WINDOWS_CARD_A_MEM = re.compile(
|
||||||
|
r'(PRS-T1__SD&)'
|
||||||
|
)
|
||||||
|
MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
|
||||||
|
STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'
|
||||||
|
|
||||||
|
THUMBNAIL_HEIGHT = 144
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
SUPPORTS_USE_AUTHOR_SORT = True
|
||||||
|
MUST_READ_METADATA = True
|
||||||
|
EBOOK_DIR_MAIN = 'Sony_Reader/media/books'
|
||||||
|
|
||||||
|
EXTRA_CUSTOMIZATION_MESSAGE = [
|
||||||
|
_('Comma separated list of metadata fields '
|
||||||
|
'to turn into collections on the device. Possibilities include: ')+\
|
||||||
|
'series, tags, authors',
|
||||||
|
_('Upload separate cover thumbnails for books') +
|
||||||
|
':::'+_('Normally, the SONY readers get the cover image from the'
|
||||||
|
' ebook file itself. With this option, calibre will send a '
|
||||||
|
'separate cover image to the reader, useful if you are '
|
||||||
|
'sending DRMed books in which you cannot change the cover.'),
|
||||||
|
_('Refresh separate covers when using automatic management') +
|
||||||
|
':::' +
|
||||||
|
_('Set this option to have separate book covers uploaded '
|
||||||
|
'every time you connect your device. Unset this option if '
|
||||||
|
'you have so many books on the reader that performance is '
|
||||||
|
'unacceptable.'),
|
||||||
|
_('Preserve cover aspect ratio when building thumbnails') +
|
||||||
|
':::' +
|
||||||
|
_('Set this option if you want the cover thumbnails to have '
|
||||||
|
'the same aspect ratio (width to height) as the cover. '
|
||||||
|
'Unset it if you want the thumbnail to be the maximum size, '
|
||||||
|
'ignoring aspect ratio.'),
|
||||||
|
]
|
||||||
|
EXTRA_CUSTOMIZATION_DEFAULT = [
|
||||||
|
', '.join(['series', 'tags']),
|
||||||
|
True,
|
||||||
|
False,
|
||||||
|
True,
|
||||||
|
]
|
||||||
|
|
||||||
|
OPT_COLLECTIONS = 0
|
||||||
|
OPT_UPLOAD_COVERS = 1
|
||||||
|
OPT_REFRESH_COVERS = 2
|
||||||
|
OPT_PRESERVE_ASPECT_RATIO = 3
|
||||||
|
|
||||||
|
plugboards = None
|
||||||
|
plugboard_func = None
|
||||||
|
|
||||||
|
def post_open_callback(self):
|
||||||
|
# Set the thumbnail width to the theoretical max if the user has asked
|
||||||
|
# that we do not preserve aspect ratio
|
||||||
|
if not self.settings().extra_customization[self.OPT_PRESERVE_ASPECT_RATIO]:
|
||||||
|
self.THUMBNAIL_WIDTH = 108
|
||||||
|
|
||||||
|
def windows_filter_pnp_id(self, pnp_id):
|
||||||
|
return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
|
||||||
|
|
||||||
|
def get_carda_ebook_dir(self, for_upload=False):
|
||||||
|
if for_upload:
|
||||||
|
return self.EBOOK_DIR_MAIN
|
||||||
|
return self.EBOOK_DIR_CARD_A
|
||||||
|
|
||||||
|
def get_main_ebook_dir(self, for_upload=False):
|
||||||
|
if for_upload:
|
||||||
|
return self.EBOOK_DIR_MAIN
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def can_handle(self, devinfo, debug=False):
|
||||||
|
if islinux:
|
||||||
|
dev = USBDevice(devinfo)
|
||||||
|
main, carda, cardb = self.find_device_nodes(detected_device=dev)
|
||||||
|
if main is None and carda is None and cardb is None:
|
||||||
|
if debug:
|
||||||
|
print ('\tPRS-T1: Appears to be in non data mode'
|
||||||
|
' or was ejected, ignoring')
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def books(self, oncard=None, end_session=True):
|
||||||
|
dummy_bl = BookList(None, None, None)
|
||||||
|
|
||||||
|
if (
|
||||||
|
(oncard == 'carda' and not self._card_a_prefix) or
|
||||||
|
(oncard and oncard != 'carda')
|
||||||
|
):
|
||||||
|
self.report_progress(1.0, _('Getting list of books on device...'))
|
||||||
|
return dummy_bl
|
||||||
|
|
||||||
|
prefix = self._card_a_prefix if oncard == 'carda' else self._main_prefix
|
||||||
|
|
||||||
|
# Let parent driver get the books
|
||||||
|
self.booklist_class.rebuild_collections = self.rebuild_collections
|
||||||
|
bl = USBMS.books(self, oncard=oncard, end_session=end_session)
|
||||||
|
|
||||||
|
dbpath = self.normalize_path(prefix + DBPATH)
|
||||||
|
debug_print("SQLite DB Path: " + dbpath)
|
||||||
|
|
||||||
|
with closing(sqlite.connect(dbpath)) as connection:
|
||||||
|
# Replace undecodable characters in the db instead of erroring out
|
||||||
|
connection.text_factory = lambda x: unicode(x, "utf-8", "replace")
|
||||||
|
|
||||||
|
cursor = connection.cursor()
|
||||||
|
# Query collections
|
||||||
|
query = '''
|
||||||
|
SELECT books._id, collection.title
|
||||||
|
FROM collections
|
||||||
|
LEFT OUTER JOIN books
|
||||||
|
LEFT OUTER JOIN collection
|
||||||
|
WHERE collections.content_id = books._id AND
|
||||||
|
collections.collection_id = collection._id
|
||||||
|
'''
|
||||||
|
cursor.execute(query)
|
||||||
|
|
||||||
|
bl_collections = {}
|
||||||
|
for i, row in enumerate(cursor):
|
||||||
|
bl_collections.setdefault(row[0], [])
|
||||||
|
bl_collections[row[0]].append(row[1])
|
||||||
|
|
||||||
|
for idx, book in enumerate(bl):
|
||||||
|
query = 'SELECT _id, thumbnail FROM books WHERE file_path = ?'
|
||||||
|
t = (book.lpath,)
|
||||||
|
cursor.execute (query, t)
|
||||||
|
|
||||||
|
for i, row in enumerate(cursor):
|
||||||
|
book.device_collections = bl_collections.get(row[0], None)
|
||||||
|
thumbnail = row[1]
|
||||||
|
if thumbnail is not None:
|
||||||
|
thumbnail = self.normalize_path(prefix + thumbnail)
|
||||||
|
book.thumbnail = ImageWrapper(thumbnail)
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
return bl
|
||||||
|
|
||||||
|
def set_plugboards(self, plugboards, pb_func):
|
||||||
|
self.plugboards = plugboards
|
||||||
|
self.plugboard_func = pb_func
|
||||||
|
|
||||||
|
def sync_booklists(self, booklists, end_session=True):
|
||||||
|
debug_print('PRST1: starting sync_booklists')
|
||||||
|
|
||||||
|
opts = self.settings()
|
||||||
|
if opts.extra_customization:
|
||||||
|
collections = [x.strip() for x in
|
||||||
|
opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
|
||||||
|
else:
|
||||||
|
collections = []
|
||||||
|
debug_print('PRST1: collection fields:', collections)
|
||||||
|
|
||||||
|
if booklists[0] is not None:
|
||||||
|
self.update_device_database(booklists[0], collections, None)
|
||||||
|
if booklists[1] is not None:
|
||||||
|
self.update_device_database(booklists[1], collections, 'carda')
|
||||||
|
|
||||||
|
USBMS.sync_booklists(self, booklists, end_session=end_session)
|
||||||
|
debug_print('PRST1: finished sync_booklists')
|
||||||
|
|
||||||
|
def update_device_database(self, booklist, collections_attributes, oncard):
|
||||||
|
debug_print('PRST1: starting update_device_database')
|
||||||
|
|
||||||
|
plugboard = None
|
||||||
|
if self.plugboard_func:
|
||||||
|
plugboard = self.plugboard_func(self.__class__.__name__,
|
||||||
|
'device_db', self.plugboards)
|
||||||
|
debug_print("PRST1: Using Plugboard", plugboard)
|
||||||
|
|
||||||
|
prefix = self._card_a_prefix if oncard == 'carda' else self._main_prefix
|
||||||
|
if prefix is None:
|
||||||
|
# Reader has no sd card inserted
|
||||||
|
return
|
||||||
|
source_id = 1 if oncard == 'carda' else 0
|
||||||
|
|
||||||
|
dbpath = self.normalize_path(prefix + DBPATH)
|
||||||
|
debug_print("SQLite DB Path: " + dbpath)
|
||||||
|
|
||||||
|
collections = booklist.get_collections(collections_attributes)
|
||||||
|
|
||||||
|
with closing(sqlite.connect(dbpath)) as connection:
|
||||||
|
self.update_device_books(connection, booklist, source_id, plugboard)
|
||||||
|
self.update_device_collections(connection, booklist, collections, source_id)
|
||||||
|
|
||||||
|
debug_print('PRST1: finished update_device_database')
|
||||||
|
|
||||||
|
def update_device_books(self, connection, booklist, source_id, plugboard):
|
||||||
|
opts = self.settings()
|
||||||
|
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
|
||||||
|
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
|
||||||
|
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# Get existing books
|
||||||
|
query = 'SELECT file_path, _id FROM books'
|
||||||
|
cursor.execute(query)
|
||||||
|
|
||||||
|
db_books = {}
|
||||||
|
for i, row in enumerate(cursor):
|
||||||
|
lpath = row[0].replace('\\', '/')
|
||||||
|
db_books[lpath] = row[1]
|
||||||
|
|
||||||
|
for book in booklist:
|
||||||
|
# Run through plugboard if needed
|
||||||
|
if plugboard is not None:
|
||||||
|
newmi = book.deepcopy_metadata()
|
||||||
|
newmi.template_to_attribute(book, plugboard)
|
||||||
|
else:
|
||||||
|
newmi = book
|
||||||
|
|
||||||
|
# Get Metadata We Want
|
||||||
|
lpath = book.lpath
|
||||||
|
try:
|
||||||
|
if opts.use_author_sort:
|
||||||
|
if newmi.author_sort:
|
||||||
|
author = newmi.author_sort
|
||||||
|
else:
|
||||||
|
author = authors_to_sort_string(newmi.authors)
|
||||||
|
else:
|
||||||
|
author = newmi.authors[0]
|
||||||
|
except:
|
||||||
|
author = _('Unknown')
|
||||||
|
title = newmi.title or _('Unknown')
|
||||||
|
|
||||||
|
# Get modified date
|
||||||
|
modified_date = os.path.getmtime(book.path)
|
||||||
|
time_offset = time.altzone if time.daylight else time.timezone
|
||||||
|
modified_date = (modified_date - time_offset) * 1000
|
||||||
|
|
||||||
|
if lpath not in db_books:
|
||||||
|
query = '''
|
||||||
|
INSERT INTO books
|
||||||
|
(title, author, source_id, added_date, modified_date,
|
||||||
|
file_path, file_name, file_size, mime_type, corrupted,
|
||||||
|
prevent_delete)
|
||||||
|
values (?,?,?,?,?,?,?,?,?,0,0)
|
||||||
|
'''
|
||||||
|
t = (title, author, source_id, int(time.time() * 1000),
|
||||||
|
modified_date, lpath,
|
||||||
|
os.path.basename(lpath), book.size, book.mime)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
book.bookId = cursor.lastrowid
|
||||||
|
if upload_covers:
|
||||||
|
self.upload_book_cover(connection, book, source_id)
|
||||||
|
debug_print('Inserted New Book: ' + book.title)
|
||||||
|
else:
|
||||||
|
query = '''
|
||||||
|
UPDATE books
|
||||||
|
SET title = ?, author = ?, modified_date = ?, file_size = ?
|
||||||
|
WHERE file_path = ?
|
||||||
|
'''
|
||||||
|
t = (title, author, modified_date, book.size, lpath)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
book.bookId = db_books[lpath]
|
||||||
|
if refresh_covers:
|
||||||
|
self.upload_book_cover(connection, book, source_id)
|
||||||
|
db_books[lpath] = None
|
||||||
|
|
||||||
|
for book, bookId in db_books.items():
|
||||||
|
if bookId is not None:
|
||||||
|
# Remove From Collections
|
||||||
|
query = 'DELETE FROM collections WHERE content_id = ?'
|
||||||
|
t = (bookId,)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
# Remove from Books
|
||||||
|
query = 'DELETE FROM books where _id = ?'
|
||||||
|
t = (bookId,)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
debug_print('Deleted Book:' + book)
|
||||||
|
|
||||||
|
connection.commit()
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
def update_device_collections(self, connection, booklist, collections,
|
||||||
|
source_id):
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
if collections:
|
||||||
|
# Get existing collections
|
||||||
|
query = 'SELECT _id, title FROM collection'
|
||||||
|
cursor.execute(query)
|
||||||
|
|
||||||
|
db_collections = {}
|
||||||
|
for i, row in enumerate(cursor):
|
||||||
|
db_collections[row[1]] = row[0]
|
||||||
|
|
||||||
|
for collection, books in collections.items():
|
||||||
|
if collection not in db_collections:
|
||||||
|
query = 'INSERT INTO collection (title, source_id) VALUES (?,?)'
|
||||||
|
t = (collection, source_id)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
db_collections[collection] = cursor.lastrowid
|
||||||
|
debug_print('Inserted New Collection: ' + collection)
|
||||||
|
|
||||||
|
# Get existing books in collection
|
||||||
|
query = '''
|
||||||
|
SELECT books.file_path, content_id
|
||||||
|
FROM collections
|
||||||
|
LEFT OUTER JOIN books
|
||||||
|
WHERE collection_id = ? AND books._id = collections.content_id
|
||||||
|
'''
|
||||||
|
t = (db_collections[collection],)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
|
||||||
|
db_books = {}
|
||||||
|
for i, row in enumerate(cursor):
|
||||||
|
db_books[row[0]] = row[1]
|
||||||
|
|
||||||
|
for idx, book in enumerate(books):
|
||||||
|
if collection not in book.device_collections:
|
||||||
|
book.device_collections.append(collection)
|
||||||
|
if db_books.get(book.lpath, None) is None:
|
||||||
|
query = '''
|
||||||
|
INSERT INTO collections (collection_id, content_id,
|
||||||
|
added_order) values (?,?,?)
|
||||||
|
'''
|
||||||
|
t = (db_collections[collection], book.bookId, idx)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
debug_print('Inserted Book Into Collection: ' +
|
||||||
|
book.title + ' -> ' + collection)
|
||||||
|
else:
|
||||||
|
query = '''
|
||||||
|
UPDATE collections
|
||||||
|
SET added_order = ?
|
||||||
|
WHERE content_id = ? AND collection_id = ?
|
||||||
|
'''
|
||||||
|
t = (idx, book.bookId, db_collections[collection])
|
||||||
|
cursor.execute(query, t)
|
||||||
|
|
||||||
|
db_books[book.lpath] = None
|
||||||
|
|
||||||
|
for bookPath, bookId in db_books.items():
|
||||||
|
if bookId is not None:
|
||||||
|
query = ('DELETE FROM collections '
|
||||||
|
'WHERE content_id = ? AND collection_id = ? ')
|
||||||
|
t = (bookId, db_collections[collection],)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
debug_print('Deleted Book From Collection: ' + bookPath
|
||||||
|
+ ' -> ' + collection)
|
||||||
|
|
||||||
|
db_collections[collection] = None
|
||||||
|
|
||||||
|
for collection, collectionId in db_collections.items():
|
||||||
|
if collectionId is not None:
|
||||||
|
# Remove Books from Collection
|
||||||
|
query = ('DELETE FROM collections '
|
||||||
|
'WHERE collection_id = ?')
|
||||||
|
t = (collectionId,)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
# Remove Collection
|
||||||
|
query = ('DELETE FROM collection '
|
||||||
|
'WHERE _id = ?')
|
||||||
|
t = (collectionId,)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
debug_print('Deleted Collection: ' + collection)
|
||||||
|
|
||||||
|
|
||||||
|
connection.commit()
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
def rebuild_collections(self, booklist, oncard):
|
||||||
|
debug_print('PRST1: starting rebuild_collections')
|
||||||
|
|
||||||
|
opts = self.settings()
|
||||||
|
if opts.extra_customization:
|
||||||
|
collections = [x.strip() for x in
|
||||||
|
opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
|
||||||
|
else:
|
||||||
|
collections = []
|
||||||
|
debug_print('PRST1: collection fields:', collections)
|
||||||
|
|
||||||
|
self.update_device_database(booklist, collections, oncard)
|
||||||
|
|
||||||
|
debug_print('PRS-T1: finished rebuild_collections')
|
||||||
|
|
||||||
|
def upload_cover(self, path, filename, metadata, filepath):
|
||||||
|
debug_print('PRS-T1: uploading cover')
|
||||||
|
|
||||||
|
if filepath.startswith(self._main_prefix):
|
||||||
|
prefix = self._main_prefix
|
||||||
|
source_id = 0
|
||||||
|
else:
|
||||||
|
prefix = self._card_a_prefix
|
||||||
|
source_id = 1
|
||||||
|
|
||||||
|
metadata.lpath = filepath.partition(prefix)[2]
|
||||||
|
metadata.lpath = metadata.lpath.replace('\\', '/')
|
||||||
|
dbpath = self.normalize_path(prefix + DBPATH)
|
||||||
|
debug_print("SQLite DB Path: " + dbpath)
|
||||||
|
|
||||||
|
with closing(sqlite.connect(dbpath)) as connection:
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
query = 'SELECT _id FROM books WHERE file_path = ?'
|
||||||
|
t = (metadata.lpath,)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
|
||||||
|
for i, row in enumerate(cursor):
|
||||||
|
metadata.bookId = row[0]
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
if getattr(metadata, 'bookId', None) is not None:
|
||||||
|
debug_print('PRS-T1: refreshing cover for book being sent')
|
||||||
|
self.upload_book_cover(connection, metadata, source_id)
|
||||||
|
|
||||||
|
debug_print('PRS-T1: done uploading cover')
|
||||||
|
|
||||||
|
def upload_book_cover(self, connection, book, source_id):
|
||||||
|
debug_print('PRST1: Uploading/Refreshing Cover for ' + book.title)
|
||||||
|
if not book.thumbnail or not book.thumbnail[-1]:
|
||||||
|
return
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
thumbnail_path = THUMBPATH%book.bookId
|
||||||
|
|
||||||
|
prefix = self._main_prefix if source_id is 0 else self._card_a_prefix
|
||||||
|
thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/'))
|
||||||
|
thumbnail_dir_path = os.path.dirname(thumbnail_file_path)
|
||||||
|
if not os.path.exists(thumbnail_dir_path):
|
||||||
|
os.makedirs(thumbnail_dir_path)
|
||||||
|
|
||||||
|
with open(thumbnail_file_path, 'wb') as f:
|
||||||
|
f.write(book.thumbnail[-1])
|
||||||
|
|
||||||
|
query = 'UPDATE books SET thumbnail = ? WHERE _id = ?'
|
||||||
|
t = (thumbnail_path, book.bookId,)
|
||||||
|
cursor.execute(query, t)
|
||||||
|
|
||||||
|
connection.commit()
|
||||||
|
cursor.close()
|
@ -483,7 +483,7 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
self._card_a_prefix = get_card_prefix('carda')
|
self._card_a_prefix = get_card_prefix('carda')
|
||||||
self._card_b_prefix = get_card_prefix('cardb')
|
self._card_b_prefix = get_card_prefix('cardb')
|
||||||
|
|
||||||
def find_device_nodes(self):
|
def find_device_nodes(self, detected_device=None):
|
||||||
|
|
||||||
def walk(base):
|
def walk(base):
|
||||||
base = os.path.abspath(os.path.realpath(base))
|
base = os.path.abspath(os.path.realpath(base))
|
||||||
@ -507,8 +507,11 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
d, j = os.path.dirname, os.path.join
|
d, j = os.path.dirname, os.path.join
|
||||||
usb_dir = None
|
usb_dir = None
|
||||||
|
|
||||||
|
if detected_device is None:
|
||||||
|
detected_device = self.detected_device
|
||||||
|
|
||||||
def test(val, attr):
|
def test(val, attr):
|
||||||
q = getattr(self.detected_device, attr)
|
q = getattr(detected_device, attr)
|
||||||
return q == val
|
return q == val
|
||||||
|
|
||||||
for x, isfile in walk('/sys/devices'):
|
for x, isfile in walk('/sys/devices'):
|
||||||
@ -596,6 +599,8 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
label = self.STORAGE_CARD2_VOLUME_LABEL
|
label = self.STORAGE_CARD2_VOLUME_LABEL
|
||||||
if not label:
|
if not label:
|
||||||
label = self.STORAGE_CARD_VOLUME_LABEL + ' 2'
|
label = self.STORAGE_CARD_VOLUME_LABEL + ' 2'
|
||||||
|
if not label:
|
||||||
|
label = 'E-book Reader (%s)'%type
|
||||||
extra = 0
|
extra = 0
|
||||||
while True:
|
while True:
|
||||||
q = ' (%d)'%extra if extra else ''
|
q = ' (%d)'%extra if extra else ''
|
||||||
|
12
src/calibre/ebooks/djvu/__init__.py
Normal file
12
src/calibre/ebooks/djvu/__init__.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Anthon van der Neut <anthon@mnt.org>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Used for DJVU input
|
||||||
|
'''
|
||||||
|
|
146
src/calibre/ebooks/djvu/djvu.py
Normal file
146
src/calibre/ebooks/djvu/djvu.py
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
#! /usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
|
||||||
|
|
||||||
|
# this code is based on:
|
||||||
|
# Lizardtech DjVu Reference
|
||||||
|
# DjVu v3
|
||||||
|
# November 2005
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import struct
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
from .djvubzzdec import BZZDecoder
|
||||||
|
|
||||||
|
class DjvuChunk(object):
|
||||||
|
def __init__(self, buf, start, end, align=True, bigendian=True,
|
||||||
|
inclheader=False, verbose=0):
|
||||||
|
self.subtype = None
|
||||||
|
self._subchunks = []
|
||||||
|
self.buf = buf
|
||||||
|
pos = start + 4
|
||||||
|
self.type = buf[start:pos]
|
||||||
|
self.align = align # whether to align to word (2-byte) boundaries
|
||||||
|
self.headersize = 0 if inclheader else 8
|
||||||
|
if bigendian:
|
||||||
|
self.strflag = b'>'
|
||||||
|
else:
|
||||||
|
self.strflag = b'<'
|
||||||
|
oldpos, pos = pos, pos+4
|
||||||
|
self.size = struct.unpack(self.strflag+b'L', buf[oldpos:pos])[0]
|
||||||
|
self.dataend = pos + self.size - (8 if inclheader else 0)
|
||||||
|
if self.type == b'FORM':
|
||||||
|
oldpos, pos = pos, pos+4
|
||||||
|
#print oldpos, pos
|
||||||
|
self.subtype = buf[oldpos:pos]
|
||||||
|
#self.headersize += 4
|
||||||
|
self.datastart = pos
|
||||||
|
if verbose > 0:
|
||||||
|
print ('found', self.type, self.subtype, pos, self.size)
|
||||||
|
if self.type in b'FORM'.split():
|
||||||
|
if verbose > 0:
|
||||||
|
print ('processing substuff %d %d (%x)' % (pos, self.dataend,
|
||||||
|
self.dataend))
|
||||||
|
numchunks = 0
|
||||||
|
while pos < self.dataend:
|
||||||
|
x = DjvuChunk(buf, pos, start+self.size, verbose=verbose)
|
||||||
|
numchunks += 1
|
||||||
|
self._subchunks.append(x)
|
||||||
|
newpos = pos + x.size + x.headersize + (1 if (x.size % 2) else 0)
|
||||||
|
if verbose > 0:
|
||||||
|
print ('newpos %d %d (%x, %x) %d' % (newpos, self.dataend,
|
||||||
|
newpos, self.dataend, x.headersize))
|
||||||
|
pos = newpos
|
||||||
|
if verbose > 0:
|
||||||
|
print (' end of chunk %d (%x)' % (pos, pos))
|
||||||
|
|
||||||
|
def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100):
|
||||||
|
if out:
|
||||||
|
out.write(b' ' * indent)
|
||||||
|
out.write(b'%s%s [%d]\n' % (self.type,
|
||||||
|
b':' + self.subtype if self.subtype else b'', self.size))
|
||||||
|
if txtout and self.type == b'TXTz':
|
||||||
|
inbuf = StringIO(self.buf[self.datastart: self.dataend])
|
||||||
|
outbuf = StringIO()
|
||||||
|
decoder = BZZDecoder(inbuf, outbuf)
|
||||||
|
while True:
|
||||||
|
xxres = decoder.convert(1024 * 1024)
|
||||||
|
if not xxres:
|
||||||
|
break
|
||||||
|
res = outbuf.getvalue()
|
||||||
|
l = 0
|
||||||
|
for x in res[:3]:
|
||||||
|
l <<= 8
|
||||||
|
l += ord(x)
|
||||||
|
if verbose > 0 and out:
|
||||||
|
print >> out, l
|
||||||
|
txtout.write(res[3:3+l])
|
||||||
|
txtout.write(b'\n\f')
|
||||||
|
if txtout and self.type == b'TXTa':
|
||||||
|
res = self.buf[self.datastart: self.dataend]
|
||||||
|
l = 0
|
||||||
|
for x in res[:3]:
|
||||||
|
l <<= 8
|
||||||
|
l += ord(x)
|
||||||
|
if verbose > 0 and out:
|
||||||
|
print >> out, l
|
||||||
|
txtout.write(res[3:3+l])
|
||||||
|
txtout.write(b'\n\f')
|
||||||
|
if indent >= maxlevel:
|
||||||
|
return
|
||||||
|
for schunk in self._subchunks:
|
||||||
|
schunk.dump(verbose=verbose, indent=indent+1, out=out, txtout=txtout)
|
||||||
|
|
||||||
|
class DJVUFile(object):
|
||||||
|
def __init__(self, instream, verbose=0):
|
||||||
|
self.instream = instream
|
||||||
|
buf = self.instream.read(4)
|
||||||
|
assert(buf == b'AT&T')
|
||||||
|
buf = self.instream.read()
|
||||||
|
self.dc = DjvuChunk(buf, 0, len(buf), verbose=verbose)
|
||||||
|
|
||||||
|
def get_text(self, outfile=None):
|
||||||
|
self.dc.dump(txtout=outfile)
|
||||||
|
|
||||||
|
def dump(self, outfile=None, maxlevel=0):
|
||||||
|
self.dc.dump(out=outfile, maxlevel=maxlevel)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
from ruamel.util.program import Program
|
||||||
|
class DJVUDecoder(Program):
|
||||||
|
def __init__(self):
|
||||||
|
Program.__init__(self)
|
||||||
|
|
||||||
|
def parser_setup(self):
|
||||||
|
Program.parser_setup(self)
|
||||||
|
#self._argparser.add_argument('--combine', '-c', action=CountAction, const=1, nargs=0)
|
||||||
|
#self._argparser.add_argument('--combine', '-c', type=int, default=1)
|
||||||
|
#self._argparser.add_argument('--segments', '-s', action='append', nargs='+')
|
||||||
|
#self._argparser.add_argument('--force', '-f', action='store_true')
|
||||||
|
#self._argparser.add_argument('classname')
|
||||||
|
self._argparser.add_argument('--text', '-t', action='store_true')
|
||||||
|
self._argparser.add_argument('--dump', type=int, default=0)
|
||||||
|
self._argparser.add_argument('file', nargs='+')
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
if self._args.verbose > 1: # can be negative with --quiet
|
||||||
|
print (self._args.file)
|
||||||
|
x = DJVUFile(file(self._args.file[0], 'rb'), verbose=self._args.verbose)
|
||||||
|
if self._args.text:
|
||||||
|
print (x.get_text(sys.stdout))
|
||||||
|
if self._args.dump:
|
||||||
|
x.dump(sys.stdout, maxlevel=self._args.dump)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
tt = DJVUDecoder()
|
||||||
|
res = tt.result
|
||||||
|
if res != 0:
|
||||||
|
print (res)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
746
src/calibre/ebooks/djvu/djvubzzdec.py
Normal file
746
src/calibre/ebooks/djvu/djvubzzdec.py
Normal file
@ -0,0 +1,746 @@
|
|||||||
|
#! /usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
|
||||||
|
#__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
# Copyright (C) 2011 Anthon van der Neut, Ruamel bvba
|
||||||
|
# Adapted from Leon Bottou's djvulibre C++ code,
|
||||||
|
# ( ZPCodec.{cpp,h} and BSByteStream.{cpp,h} )
|
||||||
|
# that code was first converted to C removing any dependencies on the DJVU libre
|
||||||
|
# framework for ByteStream, making it into a ctypes callable shared object
|
||||||
|
# then to python, and remade into a class
|
||||||
|
original_copyright_notice = '''
|
||||||
|
//C- -------------------------------------------------------------------
|
||||||
|
//C- DjVuLibre-3.5
|
||||||
|
//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
|
||||||
|
//C- Copyright (c) 2001 AT&T
|
||||||
|
//C-
|
||||||
|
//C- This software is subject to, and may be distributed under, the
|
||||||
|
//C- GNU General Public License, either Version 2 of the license,
|
||||||
|
//C- or (at your option) any later version. The license should have
|
||||||
|
//C- accompanied the software or you may obtain a copy of the license
|
||||||
|
//C- from the Free Software Foundation at http://www.fsf.org .
|
||||||
|
//C-
|
||||||
|
//C- This program is distributed in the hope that it will be useful,
|
||||||
|
//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
//C- GNU General Public License for more details.
|
||||||
|
//C-
|
||||||
|
//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
|
||||||
|
//C- Lizardtech Software. Lizardtech Software has authorized us to
|
||||||
|
//C- replace the original DjVu(r) Reference Library notice by the following
|
||||||
|
//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
|
||||||
|
//C-
|
||||||
|
//C- ------------------------------------------------------------------
|
||||||
|
//C- | DjVu (r) Reference Library (v. 3.5)
|
||||||
|
//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
|
||||||
|
//C- | The DjVu Reference Library is protected by U.S. Pat. No.
|
||||||
|
//C- | 6,058,214 and patents pending.
|
||||||
|
//C- |
|
||||||
|
//C- | This software is subject to, and may be distributed under, the
|
||||||
|
//C- | GNU General Public License, either Version 2 of the license,
|
||||||
|
//C- | or (at your option) any later version. The license should have
|
||||||
|
//C- | accompanied the software or you may obtain a copy of the license
|
||||||
|
//C- | from the Free Software Foundation at http://www.fsf.org .
|
||||||
|
//C- |
|
||||||
|
//C- | The computer code originally released by LizardTech under this
|
||||||
|
//C- | license and unmodified by other parties is deemed "the LIZARDTECH
|
||||||
|
//C- | ORIGINAL CODE." Subject to any third party intellectual property
|
||||||
|
//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
|
||||||
|
//C- | non-exclusive license to make, use, sell, or otherwise dispose of
|
||||||
|
//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
|
||||||
|
//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
|
||||||
|
//C- | General Public License. This grant only confers the right to
|
||||||
|
//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
|
||||||
|
//C- | the extent such infringement is reasonably necessary to enable
|
||||||
|
//C- | recipient to make, have made, practice, sell, or otherwise dispose
|
||||||
|
//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
|
||||||
|
//C- | any greater extent that may be necessary to utilize further
|
||||||
|
//C- | modifications or combinations.
|
||||||
|
//C- |
|
||||||
|
//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
|
||||||
|
//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||||
|
//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
|
||||||
|
//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
//C- +------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// $Id: BSByteStream.cpp,v 1.9 2007/03/25 20:48:29 leonb Exp $
|
||||||
|
// $Name: release_3_5_23 $
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
MAXBLOCK = 4096
|
||||||
|
FREQMAX = 4
|
||||||
|
CTXIDS = 3
|
||||||
|
MAXLEN = 1024 ** 2
|
||||||
|
|
||||||
|
# Exception classes used by this module.
|
||||||
|
class BZZDecoderError(Exception):
|
||||||
|
"""This exception is raised when BZZDecode runs into trouble
|
||||||
|
"""
|
||||||
|
def __init__(self, msg):
|
||||||
|
self.msg = msg
|
||||||
|
def __str__(self):
|
||||||
|
return "BZZDecoderError: %s" % (self.msg)
|
||||||
|
|
||||||
|
|
||||||
|
# This table has been designed for the ZPCoder
|
||||||
|
# * by running the following command in file 'zptable.sn':
|
||||||
|
# * (fast-crude (steady-mat 0.0035 0.0002) 260)))
|
||||||
|
default_ztable = [ # {{{
|
||||||
|
(0x8000, 0x0000, 84, 145), # 000: p=0.500000 ( 0, 0)
|
||||||
|
(0x8000, 0x0000, 3, 4), # 001: p=0.500000 ( 0, 0)
|
||||||
|
(0x8000, 0x0000, 4, 3), # 002: p=0.500000 ( 0, 0)
|
||||||
|
(0x6bbd, 0x10a5, 5, 1), # 003: p=0.465226 ( 0, 0)
|
||||||
|
(0x6bbd, 0x10a5, 6, 2), # 004: p=0.465226 ( 0, 0)
|
||||||
|
(0x5d45, 0x1f28, 7, 3), # 005: p=0.430708 ( 0, 0)
|
||||||
|
(0x5d45, 0x1f28, 8, 4), # 006: p=0.430708 ( 0, 0)
|
||||||
|
(0x51b9, 0x2bd3, 9, 5), # 007: p=0.396718 ( 0, 0)
|
||||||
|
(0x51b9, 0x2bd3, 10, 6), # 008: p=0.396718 ( 0, 0)
|
||||||
|
(0x4813, 0x36e3, 11, 7), # 009: p=0.363535 ( 0, 0)
|
||||||
|
(0x4813, 0x36e3, 12, 8), # 010: p=0.363535 ( 0, 0)
|
||||||
|
(0x3fd5, 0x408c, 13, 9), # 011: p=0.331418 ( 0, 0)
|
||||||
|
(0x3fd5, 0x408c, 14, 10), # 012: p=0.331418 ( 0, 0)
|
||||||
|
(0x38b1, 0x48fd, 15, 11), # 013: p=0.300585 ( 0, 0)
|
||||||
|
(0x38b1, 0x48fd, 16, 12), # 014: p=0.300585 ( 0, 0)
|
||||||
|
(0x3275, 0x505d, 17, 13), # 015: p=0.271213 ( 0, 0)
|
||||||
|
(0x3275, 0x505d, 18, 14), # 016: p=0.271213 ( 0, 0)
|
||||||
|
(0x2cfd, 0x56d0, 19, 15), # 017: p=0.243438 ( 0, 0)
|
||||||
|
(0x2cfd, 0x56d0, 20, 16), # 018: p=0.243438 ( 0, 0)
|
||||||
|
(0x2825, 0x5c71, 21, 17), # 019: p=0.217391 ( 0, 0)
|
||||||
|
(0x2825, 0x5c71, 22, 18), # 020: p=0.217391 ( 0, 0)
|
||||||
|
(0x23ab, 0x615b, 23, 19), # 021: p=0.193150 ( 0, 0)
|
||||||
|
(0x23ab, 0x615b, 24, 20), # 022: p=0.193150 ( 0, 0)
|
||||||
|
(0x1f87, 0x65a5, 25, 21), # 023: p=0.170728 ( 0, 0)
|
||||||
|
(0x1f87, 0x65a5, 26, 22), # 024: p=0.170728 ( 0, 0)
|
||||||
|
(0x1bbb, 0x6962, 27, 23), # 025: p=0.150158 ( 0, 0)
|
||||||
|
(0x1bbb, 0x6962, 28, 24), # 026: p=0.150158 ( 0, 0)
|
||||||
|
(0x1845, 0x6ca2, 29, 25), # 027: p=0.131418 ( 0, 0)
|
||||||
|
(0x1845, 0x6ca2, 30, 26), # 028: p=0.131418 ( 0, 0)
|
||||||
|
(0x1523, 0x6f74, 31, 27), # 029: p=0.114460 ( 0, 0)
|
||||||
|
(0x1523, 0x6f74, 32, 28), # 030: p=0.114460 ( 0, 0)
|
||||||
|
(0x1253, 0x71e6, 33, 29), # 031: p=0.099230 ( 0, 0)
|
||||||
|
(0x1253, 0x71e6, 34, 30), # 032: p=0.099230 ( 0, 0)
|
||||||
|
(0x0fcf, 0x7404, 35, 31), # 033: p=0.085611 ( 0, 0)
|
||||||
|
(0x0fcf, 0x7404, 36, 32), # 034: p=0.085611 ( 0, 0)
|
||||||
|
(0x0d95, 0x75d6, 37, 33), # 035: p=0.073550 ( 0, 0)
|
||||||
|
(0x0d95, 0x75d6, 38, 34), # 036: p=0.073550 ( 0, 0)
|
||||||
|
(0x0b9d, 0x7768, 39, 35), # 037: p=0.062888 ( 0, 0)
|
||||||
|
(0x0b9d, 0x7768, 40, 36), # 038: p=0.062888 ( 0, 0)
|
||||||
|
(0x09e3, 0x78c2, 41, 37), # 039: p=0.053539 ( 0, 0)
|
||||||
|
(0x09e3, 0x78c2, 42, 38), # 040: p=0.053539 ( 0, 0)
|
||||||
|
(0x0861, 0x79ea, 43, 39), # 041: p=0.045365 ( 0, 0)
|
||||||
|
(0x0861, 0x79ea, 44, 40), # 042: p=0.045365 ( 0, 0)
|
||||||
|
(0x0711, 0x7ae7, 45, 41), # 043: p=0.038272 ( 0, 0)
|
||||||
|
(0x0711, 0x7ae7, 46, 42), # 044: p=0.038272 ( 0, 0)
|
||||||
|
(0x05f1, 0x7bbe, 47, 43), # 045: p=0.032174 ( 0, 0)
|
||||||
|
(0x05f1, 0x7bbe, 48, 44), # 046: p=0.032174 ( 0, 0)
|
||||||
|
(0x04f9, 0x7c75, 49, 45), # 047: p=0.026928 ( 0, 0)
|
||||||
|
(0x04f9, 0x7c75, 50, 46), # 048: p=0.026928 ( 0, 0)
|
||||||
|
(0x0425, 0x7d0f, 51, 47), # 049: p=0.022444 ( 0, 0)
|
||||||
|
(0x0425, 0x7d0f, 52, 48), # 050: p=0.022444 ( 0, 0)
|
||||||
|
(0x0371, 0x7d91, 53, 49), # 051: p=0.018636 ( 0, 0)
|
||||||
|
(0x0371, 0x7d91, 54, 50), # 052: p=0.018636 ( 0, 0)
|
||||||
|
(0x02d9, 0x7dfe, 55, 51), # 053: p=0.015421 ( 0, 0)
|
||||||
|
(0x02d9, 0x7dfe, 56, 52), # 054: p=0.015421 ( 0, 0)
|
||||||
|
(0x0259, 0x7e5a, 57, 53), # 055: p=0.012713 ( 0, 0)
|
||||||
|
(0x0259, 0x7e5a, 58, 54), # 056: p=0.012713 ( 0, 0)
|
||||||
|
(0x01ed, 0x7ea6, 59, 55), # 057: p=0.010419 ( 0, 0)
|
||||||
|
(0x01ed, 0x7ea6, 60, 56), # 058: p=0.010419 ( 0, 0)
|
||||||
|
(0x0193, 0x7ee6, 61, 57), # 059: p=0.008525 ( 0, 0)
|
||||||
|
(0x0193, 0x7ee6, 62, 58), # 060: p=0.008525 ( 0, 0)
|
||||||
|
(0x0149, 0x7f1a, 63, 59), # 061: p=0.006959 ( 0, 0)
|
||||||
|
(0x0149, 0x7f1a, 64, 60), # 062: p=0.006959 ( 0, 0)
|
||||||
|
(0x010b, 0x7f45, 65, 61), # 063: p=0.005648 ( 0, 0)
|
||||||
|
(0x010b, 0x7f45, 66, 62), # 064: p=0.005648 ( 0, 0)
|
||||||
|
(0x00d5, 0x7f6b, 67, 63), # 065: p=0.004506 ( 0, 0)
|
||||||
|
(0x00d5, 0x7f6b, 68, 64), # 066: p=0.004506 ( 0, 0)
|
||||||
|
(0x00a5, 0x7f8d, 69, 65), # 067: p=0.003480 ( 0, 0)
|
||||||
|
(0x00a5, 0x7f8d, 70, 66), # 068: p=0.003480 ( 0, 0)
|
||||||
|
(0x007b, 0x7faa, 71, 67), # 069: p=0.002602 ( 0, 0)
|
||||||
|
(0x007b, 0x7faa, 72, 68), # 070: p=0.002602 ( 0, 0)
|
||||||
|
(0x0057, 0x7fc3, 73, 69), # 071: p=0.001843 ( 0, 0)
|
||||||
|
(0x0057, 0x7fc3, 74, 70), # 072: p=0.001843 ( 0, 0)
|
||||||
|
(0x003b, 0x7fd7, 75, 71), # 073: p=0.001248 ( 0, 0)
|
||||||
|
(0x003b, 0x7fd7, 76, 72), # 074: p=0.001248 ( 0, 0)
|
||||||
|
(0x0023, 0x7fe7, 77, 73), # 075: p=0.000749 ( 0, 0)
|
||||||
|
(0x0023, 0x7fe7, 78, 74), # 076: p=0.000749 ( 0, 0)
|
||||||
|
(0x0013, 0x7ff2, 79, 75), # 077: p=0.000402 ( 0, 0)
|
||||||
|
(0x0013, 0x7ff2, 80, 76), # 078: p=0.000402 ( 0, 0)
|
||||||
|
(0x0007, 0x7ffa, 81, 77), # 079: p=0.000153 ( 0, 0)
|
||||||
|
(0x0007, 0x7ffa, 82, 78), # 080: p=0.000153 ( 0, 0)
|
||||||
|
(0x0001, 0x7fff, 81, 79), # 081: p=0.000027 ( 0, 0)
|
||||||
|
(0x0001, 0x7fff, 82, 80), # 082: p=0.000027 ( 0, 0)
|
||||||
|
(0x5695, 0x0000, 9, 85), # 083: p=0.411764 ( 2, 3)
|
||||||
|
(0x24ee, 0x0000, 86, 226), # 084: p=0.199988 ( 1, 0)
|
||||||
|
(0x8000, 0x0000, 5, 6), # 085: p=0.500000 ( 3, 3)
|
||||||
|
(0x0d30, 0x0000, 88, 176), # 086: p=0.071422 ( 4, 0)
|
||||||
|
(0x481a, 0x0000, 89, 143), # 087: p=0.363634 ( 1, 2)
|
||||||
|
(0x0481, 0x0000, 90, 138), # 088: p=0.024388 ( 13, 0)
|
||||||
|
(0x3579, 0x0000, 91, 141), # 089: p=0.285711 ( 1, 3)
|
||||||
|
(0x017a, 0x0000, 92, 112), # 090: p=0.007999 ( 41, 0)
|
||||||
|
(0x24ef, 0x0000, 93, 135), # 091: p=0.199997 ( 1, 5)
|
||||||
|
(0x007b, 0x0000, 94, 104), # 092: p=0.002611 ( 127, 0)
|
||||||
|
(0x1978, 0x0000, 95, 133), # 093: p=0.137929 ( 1, 8)
|
||||||
|
(0x0028, 0x0000, 96, 100), # 094: p=0.000849 ( 392, 0)
|
||||||
|
(0x10ca, 0x0000, 97, 129), # 095: p=0.090907 ( 1, 13)
|
||||||
|
(0x000d, 0x0000, 82, 98), # 096: p=0.000276 ( 1208, 0)
|
||||||
|
(0x0b5d, 0x0000, 99, 127), # 097: p=0.061537 ( 1, 20)
|
||||||
|
(0x0034, 0x0000, 76, 72), # 098: p=0.001102 ( 1208, 1)
|
||||||
|
(0x078a, 0x0000, 101, 125), # 099: p=0.040815 ( 1, 31)
|
||||||
|
(0x00a0, 0x0000, 70, 102), # 100: p=0.003387 ( 392, 1)
|
||||||
|
(0x050f, 0x0000, 103, 123), # 101: p=0.027397 ( 1, 47)
|
||||||
|
(0x0117, 0x0000, 66, 60), # 102: p=0.005912 ( 392, 2)
|
||||||
|
(0x0358, 0x0000, 105, 121), # 103: p=0.018099 ( 1, 72)
|
||||||
|
(0x01ea, 0x0000, 106, 110), # 104: p=0.010362 ( 127, 1)
|
||||||
|
(0x0234, 0x0000, 107, 119), # 105: p=0.011940 ( 1, 110)
|
||||||
|
(0x0144, 0x0000, 66, 108), # 106: p=0.006849 ( 193, 1)
|
||||||
|
(0x0173, 0x0000, 109, 117), # 107: p=0.007858 ( 1, 168)
|
||||||
|
(0x0234, 0x0000, 60, 54), # 108: p=0.011925 ( 193, 2)
|
||||||
|
(0x00f5, 0x0000, 111, 115), # 109: p=0.005175 ( 1, 256)
|
||||||
|
(0x0353, 0x0000, 56, 48), # 110: p=0.017995 ( 127, 2)
|
||||||
|
(0x00a1, 0x0000, 69, 113), # 111: p=0.003413 ( 1, 389)
|
||||||
|
(0x05c5, 0x0000, 114, 134), # 112: p=0.031249 ( 41, 1)
|
||||||
|
(0x011a, 0x0000, 65, 59), # 113: p=0.005957 ( 2, 389)
|
||||||
|
(0x03cf, 0x0000, 116, 132), # 114: p=0.020618 ( 63, 1)
|
||||||
|
(0x01aa, 0x0000, 61, 55), # 115: p=0.009020 ( 2, 256)
|
||||||
|
(0x0285, 0x0000, 118, 130), # 116: p=0.013652 ( 96, 1)
|
||||||
|
(0x0286, 0x0000, 57, 51), # 117: p=0.013672 ( 2, 168)
|
||||||
|
(0x01ab, 0x0000, 120, 128), # 118: p=0.009029 ( 146, 1)
|
||||||
|
(0x03d3, 0x0000, 53, 47), # 119: p=0.020710 ( 2, 110)
|
||||||
|
(0x011a, 0x0000, 122, 126), # 120: p=0.005961 ( 222, 1)
|
||||||
|
(0x05c5, 0x0000, 49, 41), # 121: p=0.031250 ( 2, 72)
|
||||||
|
(0x00ba, 0x0000, 124, 62), # 122: p=0.003925 ( 338, 1)
|
||||||
|
(0x08ad, 0x0000, 43, 37), # 123: p=0.046979 ( 2, 47)
|
||||||
|
(0x007a, 0x0000, 72, 66), # 124: p=0.002586 ( 514, 1)
|
||||||
|
(0x0ccc, 0x0000, 39, 31), # 125: p=0.069306 ( 2, 31)
|
||||||
|
(0x01eb, 0x0000, 60, 54), # 126: p=0.010386 ( 222, 2)
|
||||||
|
(0x1302, 0x0000, 33, 25), # 127: p=0.102940 ( 2, 20)
|
||||||
|
(0x02e6, 0x0000, 56, 50), # 128: p=0.015695 ( 146, 2)
|
||||||
|
(0x1b81, 0x0000, 29, 131), # 129: p=0.148935 ( 2, 13)
|
||||||
|
(0x045e, 0x0000, 52, 46), # 130: p=0.023648 ( 96, 2)
|
||||||
|
(0x24ef, 0x0000, 23, 17), # 131: p=0.199999 ( 3, 13)
|
||||||
|
(0x0690, 0x0000, 48, 40), # 132: p=0.035533 ( 63, 2)
|
||||||
|
(0x2865, 0x0000, 23, 15), # 133: p=0.218748 ( 2, 8)
|
||||||
|
(0x09de, 0x0000, 42, 136), # 134: p=0.053434 ( 41, 2)
|
||||||
|
(0x3987, 0x0000, 137, 7), # 135: p=0.304346 ( 2, 5)
|
||||||
|
(0x0dc8, 0x0000, 38, 32), # 136: p=0.074626 ( 41, 3)
|
||||||
|
(0x2c99, 0x0000, 21, 139), # 137: p=0.241378 ( 2, 7)
|
||||||
|
(0x10ca, 0x0000, 140, 172), # 138: p=0.090907 ( 13, 1)
|
||||||
|
(0x3b5f, 0x0000, 15, 9), # 139: p=0.312499 ( 3, 7)
|
||||||
|
(0x0b5d, 0x0000, 142, 170), # 140: p=0.061537 ( 20, 1)
|
||||||
|
(0x5695, 0x0000, 9, 85), # 141: p=0.411764 ( 2, 3)
|
||||||
|
(0x078a, 0x0000, 144, 168), # 142: p=0.040815 ( 31, 1)
|
||||||
|
(0x8000, 0x0000, 141, 248), # 143: p=0.500000 ( 2, 2)
|
||||||
|
(0x050f, 0x0000, 146, 166), # 144: p=0.027397 ( 47, 1)
|
||||||
|
(0x24ee, 0x0000, 147, 247), # 145: p=0.199988 ( 0, 1)
|
||||||
|
(0x0358, 0x0000, 148, 164), # 146: p=0.018099 ( 72, 1)
|
||||||
|
(0x0d30, 0x0000, 149, 197), # 147: p=0.071422 ( 0, 4)
|
||||||
|
(0x0234, 0x0000, 150, 162), # 148: p=0.011940 ( 110, 1)
|
||||||
|
(0x0481, 0x0000, 151, 95), # 149: p=0.024388 ( 0, 13)
|
||||||
|
(0x0173, 0x0000, 152, 160), # 150: p=0.007858 ( 168, 1)
|
||||||
|
(0x017a, 0x0000, 153, 173), # 151: p=0.007999 ( 0, 41)
|
||||||
|
(0x00f5, 0x0000, 154, 158), # 152: p=0.005175 ( 256, 1)
|
||||||
|
(0x007b, 0x0000, 155, 165), # 153: p=0.002611 ( 0, 127)
|
||||||
|
(0x00a1, 0x0000, 70, 156), # 154: p=0.003413 ( 389, 1)
|
||||||
|
(0x0028, 0x0000, 157, 161), # 155: p=0.000849 ( 0, 392)
|
||||||
|
(0x011a, 0x0000, 66, 60), # 156: p=0.005957 ( 389, 2)
|
||||||
|
(0x000d, 0x0000, 81, 159), # 157: p=0.000276 ( 0, 1208)
|
||||||
|
(0x01aa, 0x0000, 62, 56), # 158: p=0.009020 ( 256, 2)
|
||||||
|
(0x0034, 0x0000, 75, 71), # 159: p=0.001102 ( 1, 1208)
|
||||||
|
(0x0286, 0x0000, 58, 52), # 160: p=0.013672 ( 168, 2)
|
||||||
|
(0x00a0, 0x0000, 69, 163), # 161: p=0.003387 ( 1, 392)
|
||||||
|
(0x03d3, 0x0000, 54, 48), # 162: p=0.020710 ( 110, 2)
|
||||||
|
(0x0117, 0x0000, 65, 59), # 163: p=0.005912 ( 2, 392)
|
||||||
|
(0x05c5, 0x0000, 50, 42), # 164: p=0.031250 ( 72, 2)
|
||||||
|
(0x01ea, 0x0000, 167, 171), # 165: p=0.010362 ( 1, 127)
|
||||||
|
(0x08ad, 0x0000, 44, 38), # 166: p=0.046979 ( 47, 2)
|
||||||
|
(0x0144, 0x0000, 65, 169), # 167: p=0.006849 ( 1, 193)
|
||||||
|
(0x0ccc, 0x0000, 40, 32), # 168: p=0.069306 ( 31, 2)
|
||||||
|
(0x0234, 0x0000, 59, 53), # 169: p=0.011925 ( 2, 193)
|
||||||
|
(0x1302, 0x0000, 34, 26), # 170: p=0.102940 ( 20, 2)
|
||||||
|
(0x0353, 0x0000, 55, 47), # 171: p=0.017995 ( 2, 127)
|
||||||
|
(0x1b81, 0x0000, 30, 174), # 172: p=0.148935 ( 13, 2)
|
||||||
|
(0x05c5, 0x0000, 175, 193), # 173: p=0.031249 ( 1, 41)
|
||||||
|
(0x24ef, 0x0000, 24, 18), # 174: p=0.199999 ( 13, 3)
|
||||||
|
(0x03cf, 0x0000, 177, 191), # 175: p=0.020618 ( 1, 63)
|
||||||
|
(0x2b74, 0x0000, 178, 222), # 176: p=0.235291 ( 4, 1)
|
||||||
|
(0x0285, 0x0000, 179, 189), # 177: p=0.013652 ( 1, 96)
|
||||||
|
(0x201d, 0x0000, 180, 218), # 178: p=0.173910 ( 6, 1)
|
||||||
|
(0x01ab, 0x0000, 181, 187), # 179: p=0.009029 ( 1, 146)
|
||||||
|
(0x1715, 0x0000, 182, 216), # 180: p=0.124998 ( 9, 1)
|
||||||
|
(0x011a, 0x0000, 183, 185), # 181: p=0.005961 ( 1, 222)
|
||||||
|
(0x0fb7, 0x0000, 184, 214), # 182: p=0.085105 ( 14, 1)
|
||||||
|
(0x00ba, 0x0000, 69, 61), # 183: p=0.003925 ( 1, 338)
|
||||||
|
(0x0a67, 0x0000, 186, 212), # 184: p=0.056337 ( 22, 1)
|
||||||
|
(0x01eb, 0x0000, 59, 53), # 185: p=0.010386 ( 2, 222)
|
||||||
|
(0x06e7, 0x0000, 188, 210), # 186: p=0.037382 ( 34, 1)
|
||||||
|
(0x02e6, 0x0000, 55, 49), # 187: p=0.015695 ( 2, 146)
|
||||||
|
(0x0496, 0x0000, 190, 208), # 188: p=0.024844 ( 52, 1)
|
||||||
|
(0x045e, 0x0000, 51, 45), # 189: p=0.023648 ( 2, 96)
|
||||||
|
(0x030d, 0x0000, 192, 206), # 190: p=0.016529 ( 79, 1)
|
||||||
|
(0x0690, 0x0000, 47, 39), # 191: p=0.035533 ( 2, 63)
|
||||||
|
(0x0206, 0x0000, 194, 204), # 192: p=0.010959 ( 120, 1)
|
||||||
|
(0x09de, 0x0000, 41, 195), # 193: p=0.053434 ( 2, 41)
|
||||||
|
(0x0155, 0x0000, 196, 202), # 194: p=0.007220 ( 183, 1)
|
||||||
|
(0x0dc8, 0x0000, 37, 31), # 195: p=0.074626 ( 3, 41)
|
||||||
|
(0x00e1, 0x0000, 198, 200), # 196: p=0.004750 ( 279, 1)
|
||||||
|
(0x2b74, 0x0000, 199, 243), # 197: p=0.235291 ( 1, 4)
|
||||||
|
(0x0094, 0x0000, 72, 64), # 198: p=0.003132 ( 424, 1)
|
||||||
|
(0x201d, 0x0000, 201, 239), # 199: p=0.173910 ( 1, 6)
|
||||||
|
(0x0188, 0x0000, 62, 56), # 200: p=0.008284 ( 279, 2)
|
||||||
|
(0x1715, 0x0000, 203, 237), # 201: p=0.124998 ( 1, 9)
|
||||||
|
(0x0252, 0x0000, 58, 52), # 202: p=0.012567 ( 183, 2)
|
||||||
|
(0x0fb7, 0x0000, 205, 235), # 203: p=0.085105 ( 1, 14)
|
||||||
|
(0x0383, 0x0000, 54, 48), # 204: p=0.019021 ( 120, 2)
|
||||||
|
(0x0a67, 0x0000, 207, 233), # 205: p=0.056337 ( 1, 22)
|
||||||
|
(0x0547, 0x0000, 50, 44), # 206: p=0.028571 ( 79, 2)
|
||||||
|
(0x06e7, 0x0000, 209, 231), # 207: p=0.037382 ( 1, 34)
|
||||||
|
(0x07e2, 0x0000, 46, 38), # 208: p=0.042682 ( 52, 2)
|
||||||
|
(0x0496, 0x0000, 211, 229), # 209: p=0.024844 ( 1, 52)
|
||||||
|
(0x0bc0, 0x0000, 40, 34), # 210: p=0.063636 ( 34, 2)
|
||||||
|
(0x030d, 0x0000, 213, 227), # 211: p=0.016529 ( 1, 79)
|
||||||
|
(0x1178, 0x0000, 36, 28), # 212: p=0.094593 ( 22, 2)
|
||||||
|
(0x0206, 0x0000, 215, 225), # 213: p=0.010959 ( 1, 120)
|
||||||
|
(0x19da, 0x0000, 30, 22), # 214: p=0.139999 ( 14, 2)
|
||||||
|
(0x0155, 0x0000, 217, 223), # 215: p=0.007220 ( 1, 183)
|
||||||
|
(0x24ef, 0x0000, 26, 16), # 216: p=0.199998 ( 9, 2)
|
||||||
|
(0x00e1, 0x0000, 219, 221), # 217: p=0.004750 ( 1, 279)
|
||||||
|
(0x320e, 0x0000, 20, 220), # 218: p=0.269229 ( 6, 2)
|
||||||
|
(0x0094, 0x0000, 71, 63), # 219: p=0.003132 ( 1, 424)
|
||||||
|
(0x432a, 0x0000, 14, 8), # 220: p=0.344827 ( 6, 3)
|
||||||
|
(0x0188, 0x0000, 61, 55), # 221: p=0.008284 ( 2, 279)
|
||||||
|
(0x447d, 0x0000, 14, 224), # 222: p=0.349998 ( 4, 2)
|
||||||
|
(0x0252, 0x0000, 57, 51), # 223: p=0.012567 ( 2, 183)
|
||||||
|
(0x5ece, 0x0000, 8, 2), # 224: p=0.434782 ( 4, 3)
|
||||||
|
(0x0383, 0x0000, 53, 47), # 225: p=0.019021 ( 2, 120)
|
||||||
|
(0x8000, 0x0000, 228, 87), # 226: p=0.500000 ( 1, 1)
|
||||||
|
(0x0547, 0x0000, 49, 43), # 227: p=0.028571 ( 2, 79)
|
||||||
|
(0x481a, 0x0000, 230, 246), # 228: p=0.363634 ( 2, 1)
|
||||||
|
(0x07e2, 0x0000, 45, 37), # 229: p=0.042682 ( 2, 52)
|
||||||
|
(0x3579, 0x0000, 232, 244), # 230: p=0.285711 ( 3, 1)
|
||||||
|
(0x0bc0, 0x0000, 39, 33), # 231: p=0.063636 ( 2, 34)
|
||||||
|
(0x24ef, 0x0000, 234, 238), # 232: p=0.199997 ( 5, 1)
|
||||||
|
(0x1178, 0x0000, 35, 27), # 233: p=0.094593 ( 2, 22)
|
||||||
|
(0x1978, 0x0000, 138, 236), # 234: p=0.137929 ( 8, 1)
|
||||||
|
(0x19da, 0x0000, 29, 21), # 235: p=0.139999 ( 2, 14)
|
||||||
|
(0x2865, 0x0000, 24, 16), # 236: p=0.218748 ( 8, 2)
|
||||||
|
(0x24ef, 0x0000, 25, 15), # 237: p=0.199998 ( 2, 9)
|
||||||
|
(0x3987, 0x0000, 240, 8), # 238: p=0.304346 ( 5, 2)
|
||||||
|
(0x320e, 0x0000, 19, 241), # 239: p=0.269229 ( 2, 6)
|
||||||
|
(0x2c99, 0x0000, 22, 242), # 240: p=0.241378 ( 7, 2)
|
||||||
|
(0x432a, 0x0000, 13, 7), # 241: p=0.344827 ( 3, 6)
|
||||||
|
(0x3b5f, 0x0000, 16, 10), # 242: p=0.312499 ( 7, 3)
|
||||||
|
(0x447d, 0x0000, 13, 245), # 243: p=0.349998 ( 2, 4)
|
||||||
|
(0x5695, 0x0000, 10, 2), # 244: p=0.411764 ( 3, 2)
|
||||||
|
(0x5ece, 0x0000, 7, 1), # 245: p=0.434782 ( 3, 4)
|
||||||
|
(0x8000, 0x0000, 244, 83), # 246: p=0.500000 ( 2, 2)
|
||||||
|
(0x8000, 0x0000, 249, 250), # 247: p=0.500000 ( 1, 1)
|
||||||
|
(0x5695, 0x0000, 10, 2), # 248: p=0.411764 ( 3, 2)
|
||||||
|
(0x481a, 0x0000, 89, 143), # 249: p=0.363634 ( 1, 2)
|
||||||
|
(0x481a, 0x0000, 230, 246), # 250: p=0.363634 ( 2, 1)
|
||||||
|
(0, 0, 0, 0),
|
||||||
|
(0, 0, 0, 0),
|
||||||
|
(0, 0, 0, 0),
|
||||||
|
(0, 0, 0, 0),
|
||||||
|
(0, 0, 0, 0),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
xmtf = (
|
||||||
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||||
|
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||||
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||||
|
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||||
|
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
|
||||||
|
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
|
||||||
|
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||||||
|
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
|
||||||
|
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
||||||
|
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
|
||||||
|
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
||||||
|
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
|
||||||
|
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||||||
|
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
|
||||||
|
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||||||
|
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
|
||||||
|
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
|
||||||
|
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
|
||||||
|
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
|
||||||
|
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
|
||||||
|
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
|
||||||
|
0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
|
||||||
|
0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
|
||||||
|
0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
|
||||||
|
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
|
||||||
|
0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
|
||||||
|
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
|
||||||
|
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
|
||||||
|
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
|
||||||
|
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
|
||||||
|
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
|
||||||
|
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
|
||||||
|
)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def chr3(l):
|
||||||
|
return bytes(bytearray(l))
|
||||||
|
|
||||||
|
class BZZDecoder():
|
||||||
|
def __init__(self, infile, outfile):
|
||||||
|
self.instream = infile
|
||||||
|
self.outf = outfile
|
||||||
|
self.ieof = False
|
||||||
|
self.bptr = None
|
||||||
|
self.xsize = None
|
||||||
|
self.outbuf = [0] * (MAXBLOCK * 1024)
|
||||||
|
self.byte = None
|
||||||
|
self.scount = 0
|
||||||
|
self.delay = 25
|
||||||
|
self.a = 0
|
||||||
|
self.code = 0
|
||||||
|
self.bufint = 0
|
||||||
|
self.ctx = [0] * 300
|
||||||
|
# table
|
||||||
|
self.p = [0] * 256
|
||||||
|
self.m = [0] * 256
|
||||||
|
self.up = [0] * 256
|
||||||
|
self.dn = [0] * 256
|
||||||
|
# machine independent ffz
|
||||||
|
self.ffzt = [0] * 256
|
||||||
|
|
||||||
|
# Create machine independent ffz table
|
||||||
|
for i in range(256):
|
||||||
|
j = i
|
||||||
|
while(j & 0x80):
|
||||||
|
self.ffzt[i] += 1
|
||||||
|
j <<= 1
|
||||||
|
# Initialize table
|
||||||
|
self.newtable(default_ztable)
|
||||||
|
# Codebit counter
|
||||||
|
# Read first 16 bits of code
|
||||||
|
if not self.read_byte():
|
||||||
|
self.byte = 0xff
|
||||||
|
self.code = (self.byte << 8)
|
||||||
|
if not self.read_byte():
|
||||||
|
self.byte = 0xff
|
||||||
|
self.code = self.code | self.byte
|
||||||
|
# Preload buffer
|
||||||
|
self.preload()
|
||||||
|
# Compute initial fence
|
||||||
|
self.fence = self.code
|
||||||
|
if self.code >= 0x8000:
|
||||||
|
self.fence = 0x7fff
|
||||||
|
|
||||||
|
def convert(self, sz):
|
||||||
|
if self.ieof:
|
||||||
|
return 0
|
||||||
|
copied = 0
|
||||||
|
while sz > 0 and not (self.ieof):
|
||||||
|
# Decode if needed
|
||||||
|
if not self.xsize:
|
||||||
|
self.bptr = 0
|
||||||
|
if not self.decode(): # input block size set in decode
|
||||||
|
self.xsize = 1
|
||||||
|
self.ieof = True
|
||||||
|
self.xsize -= 1
|
||||||
|
|
||||||
|
# Compute remaining
|
||||||
|
bytes = self.xsize
|
||||||
|
if bytes > sz:
|
||||||
|
bytes = sz
|
||||||
|
# Transfer
|
||||||
|
if bytes:
|
||||||
|
for i in range(bytes):
|
||||||
|
self.outf.write(chr3(self.outbuf[self.bptr + i]))
|
||||||
|
self.xsize -= bytes
|
||||||
|
self.bptr += bytes
|
||||||
|
sz -= bytes
|
||||||
|
copied += bytes
|
||||||
|
# offset += bytes; // for tell()
|
||||||
|
return copied
|
||||||
|
|
||||||
|
def preload(self):
|
||||||
|
while self.scount <= 24:
|
||||||
|
if self.read_byte() < 1:
|
||||||
|
self.byte = 0xff
|
||||||
|
if --self.delay < 1:
|
||||||
|
raise BZZDecoderError("BiteStream EOF")
|
||||||
|
self.bufint = (self.bufint << 8) | self.byte
|
||||||
|
self.scount += 8
|
||||||
|
|
||||||
|
def newtable(self, table):
|
||||||
|
for i in range(256):
|
||||||
|
self.p[i] = table[i][0]
|
||||||
|
self.m[i] = table[i][1]
|
||||||
|
self.up[i] = table[i][2]
|
||||||
|
self.dn[i] = table[i][3]
|
||||||
|
|
||||||
|
def decode(self):
|
||||||
|
outbuf = self.outbuf
|
||||||
|
# Decode block size
|
||||||
|
self.xsize = self.decode_raw(24)
|
||||||
|
if not self.xsize:
|
||||||
|
return 0
|
||||||
|
if self.xsize > MAXBLOCK * 1024: # 4MB (4096 * 1024) is max block
|
||||||
|
raise BZZDecoderError("BiteStream.corrupt")
|
||||||
|
# Dec11ode Estimation Speed
|
||||||
|
fshift = 0
|
||||||
|
if self.zpcodec_decoder():
|
||||||
|
fshift += 1
|
||||||
|
if self.zpcodec_decoder():
|
||||||
|
fshift += 1
|
||||||
|
# Prepare Quasi MTF
|
||||||
|
mtf = list(xmtf) # unsigned chars
|
||||||
|
freq = [0] * FREQMAX
|
||||||
|
fadd = 4
|
||||||
|
# Decode
|
||||||
|
mtfno = 3
|
||||||
|
markerpos = -1
|
||||||
|
for i in range(self.xsize):
|
||||||
|
ctxid = CTXIDS - 1
|
||||||
|
if ctxid > mtfno:
|
||||||
|
ctxid = mtfno
|
||||||
|
cx = self.ctx
|
||||||
|
if self.zpcodec_decode(cx, ctxid):
|
||||||
|
mtfno = 0
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
elif self.zpcodec_decode(cx, ctxid + CTXIDS):
|
||||||
|
mtfno = 1
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
elif self.zpcodec_decode(cx, 2*CTXIDS):
|
||||||
|
mtfno = 2 + self.decode_binary(cx, 2*CTXIDS + 1, 1)
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
elif self.zpcodec_decode(cx, 2*CTXIDS+2):
|
||||||
|
mtfno = 4 + self.decode_binary(cx, 2*CTXIDS+2 + 1, 2)
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
elif self.zpcodec_decode(cx, 2*CTXIDS + 6):
|
||||||
|
mtfno = 8 + self.decode_binary(cx, 2*CTXIDS + 6 + 1, 3)
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
elif self.zpcodec_decode(cx, 2*CTXIDS + 14):
|
||||||
|
mtfno = 16 + self.decode_binary(cx, 2*CTXIDS + 14 + 1, 4)
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
elif self.zpcodec_decode(cx, 2*CTXIDS + 30 ):
|
||||||
|
mtfno = 32 + self.decode_binary(cx, 2*CTXIDS + 30 + 1, 5)
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
elif self.zpcodec_decode(cx, 2*CTXIDS + 62 ):
|
||||||
|
mtfno = 64 + self.decode_binary(cx, 2*CTXIDS + 62 + 1, 6)
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
elif self.zpcodec_decode(cx, 2*CTXIDS + 126):
|
||||||
|
mtfno = 128 + self.decode_binary(cx, 2*CTXIDS + 126 + 1, 7)
|
||||||
|
outbuf[i] = mtf[mtfno]
|
||||||
|
else:
|
||||||
|
mtfno = 256 # EOB
|
||||||
|
outbuf[i] = 0
|
||||||
|
markerpos = i
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Rotate mtf according to empirical frequencies (new!)
|
||||||
|
# :rotate label
|
||||||
|
# Adjust frequencies for overflow
|
||||||
|
fadd = fadd + (fadd >> fshift)
|
||||||
|
if fadd > 0x10000000:
|
||||||
|
fadd >>= 24
|
||||||
|
freq[0] >>= 24
|
||||||
|
freq[1] >>= 24
|
||||||
|
freq[2] >>= 24
|
||||||
|
freq[3] >>= 24
|
||||||
|
for k in range(4, FREQMAX):
|
||||||
|
freq[k] = freq[k] >> 24
|
||||||
|
# Relocate new char according to new freq
|
||||||
|
fc = fadd
|
||||||
|
if mtfno < FREQMAX:
|
||||||
|
fc += freq[mtfno]
|
||||||
|
k = mtfno
|
||||||
|
while (k >= FREQMAX):
|
||||||
|
mtf[k] = mtf[k - 1]
|
||||||
|
k -= 1
|
||||||
|
while (k > 0 and fc >= freq[k - 1]):
|
||||||
|
mtf[k] = mtf[k - 1]
|
||||||
|
freq[k] = freq[k - 1]
|
||||||
|
k -= 1
|
||||||
|
mtf[k] = outbuf[i]
|
||||||
|
freq[k] = fc
|
||||||
|
#///////////////////////////////
|
||||||
|
#//////// Reconstruct the string
|
||||||
|
|
||||||
|
if markerpos < 1 or markerpos >= self.xsize:
|
||||||
|
raise BZZDecoderError("BiteStream.corrupt")
|
||||||
|
# Allocate pointers
|
||||||
|
posn = [0] * self.xsize
|
||||||
|
# Prepare count buffer
|
||||||
|
count = [0] * 256
|
||||||
|
# Fill count buffer
|
||||||
|
for i in range(markerpos):
|
||||||
|
c = outbuf[i]
|
||||||
|
posn[i] = (c << 24) | (count[c] & 0xffffff)
|
||||||
|
count[c] += 1
|
||||||
|
for i in range(markerpos + 1, self.xsize):
|
||||||
|
c = outbuf[i]
|
||||||
|
posn[i] = (c << 24) | (count[c] & 0xffffff)
|
||||||
|
count[c] += 1
|
||||||
|
# Compute sorted char positions
|
||||||
|
last = 1
|
||||||
|
for i in range(256):
|
||||||
|
tmp = count[i]
|
||||||
|
count[i] = last
|
||||||
|
last += tmp
|
||||||
|
# Undo the sort transform
|
||||||
|
i = 0
|
||||||
|
last = self.xsize - 1
|
||||||
|
while last > 0:
|
||||||
|
n = posn[i]
|
||||||
|
c = (posn[i] >> 24)
|
||||||
|
last -= 1
|
||||||
|
outbuf[last] = c
|
||||||
|
i = count[c] + (n & 0xffffff)
|
||||||
|
# Free and check
|
||||||
|
if i != markerpos:
|
||||||
|
raise BZZDecoderError("BiteStream.corrupt")
|
||||||
|
return self.xsize
|
||||||
|
|
||||||
|
def decode_raw(self, bits):
|
||||||
|
n = 1
|
||||||
|
m = (1 << bits)
|
||||||
|
while n < m:
|
||||||
|
b = self.zpcodec_decoder()
|
||||||
|
n = (n << 1) | b
|
||||||
|
return n - m
|
||||||
|
|
||||||
|
def decode_binary(self, ctx, index, bits):
|
||||||
|
n = 1
|
||||||
|
m = (1 << bits)
|
||||||
|
while n < m:
|
||||||
|
b = self.zpcodec_decode(ctx, index + n - 1)
|
||||||
|
n = (n << 1) | b
|
||||||
|
return n - m
|
||||||
|
|
||||||
|
def zpcodec_decoder(self):
|
||||||
|
return self.decode_sub_simple(0, 0x8000 + (self.a >> 1))
|
||||||
|
|
||||||
|
def decode_sub_simple(self, mps, z):
|
||||||
|
# Test MPS/LPS
|
||||||
|
if z > self.code:
|
||||||
|
# LPS branch
|
||||||
|
z = 0x10000 - z
|
||||||
|
self.a += +z
|
||||||
|
self.code = self.code + z
|
||||||
|
# LPS renormalization
|
||||||
|
shift = self.ffz()
|
||||||
|
self.scount -= shift
|
||||||
|
self.a = self.a << shift
|
||||||
|
self.a &= 0xffff
|
||||||
|
self.code = (self.code << shift) | ((self.bufint >> self.scount) & ((1 << shift) - 1))
|
||||||
|
self.code &= 0xffff
|
||||||
|
if self.scount < 16:
|
||||||
|
self.preload()
|
||||||
|
# Adjust fence
|
||||||
|
self.fence = self.code
|
||||||
|
if self.code >= 0x8000:
|
||||||
|
self.fence = 0x7fff
|
||||||
|
result = mps ^ 1
|
||||||
|
else:
|
||||||
|
# MPS renormalization
|
||||||
|
self.scount -= 1
|
||||||
|
self.a = (z << 1) & 0xffff
|
||||||
|
self.code = ((self.code << 1) | ((self.bufint >> self.scount) & 1))
|
||||||
|
self.code &= 0xffff
|
||||||
|
if self.scount < 16:
|
||||||
|
self.preload()
|
||||||
|
# Adjust fence
|
||||||
|
self.fence = self.code
|
||||||
|
if self.code >= 0x8000:
|
||||||
|
self.fence = 0x7fff
|
||||||
|
result = mps
|
||||||
|
return result
|
||||||
|
|
||||||
|
def decode_sub(self, ctx, index, z):
|
||||||
|
# Save bit
|
||||||
|
bit = (ctx[index] & 1)
|
||||||
|
# Avoid interval reversion
|
||||||
|
d = 0x6000 + ((z + self.a) >> 2)
|
||||||
|
if z > d:
|
||||||
|
z = d
|
||||||
|
# Test MPS/LPS
|
||||||
|
if z > self.code:
|
||||||
|
# LPS branch
|
||||||
|
z = 0x10000 - z
|
||||||
|
self.a += +z
|
||||||
|
self.code = self.code + z
|
||||||
|
# LPS adaptation
|
||||||
|
ctx[index] = self.dn[ctx[index]]
|
||||||
|
# LPS renormalization
|
||||||
|
shift = self.ffz()
|
||||||
|
self.scount -= shift
|
||||||
|
self.a = (self.a << shift) & 0xffff
|
||||||
|
self.code = ((self.code << shift) | ((self.bufint >> self.scount) & ((1 << shift) - 1))) & 0xffff
|
||||||
|
if self.scount < 16:
|
||||||
|
self.preload()
|
||||||
|
# Adjust fence
|
||||||
|
self.fence = self.code
|
||||||
|
if self.code >= 0x8000:
|
||||||
|
self.fence = 0x7fff
|
||||||
|
return bit ^ 1
|
||||||
|
else:
|
||||||
|
# MPS adaptation
|
||||||
|
if self.a >= self.m[ctx[index]]:
|
||||||
|
ctx[index] = self.up[ctx[index]]
|
||||||
|
# MPS renormalization
|
||||||
|
self.scount -= 1
|
||||||
|
self.a = z << 1 & 0xffff
|
||||||
|
self.code = ((self.code << 1) | ((self.bufint >> self.scount) & 1)) & 0xffff
|
||||||
|
if self.scount < 16:
|
||||||
|
self.preload()
|
||||||
|
# Adjust fence
|
||||||
|
self.fence = self.code
|
||||||
|
if self.code >= 0x8000:
|
||||||
|
self.fence = 0x7fff
|
||||||
|
return bit
|
||||||
|
|
||||||
|
def zpcodec_decode(self, ctx, index):
|
||||||
|
z = self.a + self.p[ctx[index]]
|
||||||
|
if z <= self.fence:
|
||||||
|
self.a = z
|
||||||
|
res = (ctx[index] & 1)
|
||||||
|
else:
|
||||||
|
res = self.decode_sub(ctx, index, z)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def read_byte(self):
|
||||||
|
res = 0
|
||||||
|
if self.instream:
|
||||||
|
ires = self.instream.read(1)
|
||||||
|
res = len(ires)
|
||||||
|
if res:
|
||||||
|
self.byte = ord(ires[0])
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
return res
|
||||||
|
|
||||||
|
def ffz(self):
|
||||||
|
x = self.a
|
||||||
|
if (x >= 0xff00):
|
||||||
|
return (self.ffzt[x & 0xff] + 8)
|
||||||
|
else:
|
||||||
|
return (self.ffzt[(x >> 8) & 0xff])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### for testing
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import sys
|
||||||
|
infile = file(sys.argv[1], "rb")
|
||||||
|
outfile = file(sys.argv[2], "wb")
|
||||||
|
dec = BZZDecoder(infile, outfile)
|
||||||
|
while True:
|
||||||
|
res = dec.convert(1024 * 1024)
|
||||||
|
if not res:
|
||||||
|
break
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
87
src/calibre/ebooks/djvu/input.py
Normal file
87
src/calibre/ebooks/djvu/input.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, Anthon van der Neut <anthon@mnt.org>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
from calibre.ebooks.txt.processor import convert_basic
|
||||||
|
|
||||||
|
class DJVUInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'DJVU Input'
|
||||||
|
author = 'Anthon van der Neut'
|
||||||
|
description = 'Convert OCR-ed DJVU files (.djvu) to HTML'
|
||||||
|
file_types = set(['djvu', 'djv'])
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='use_djvutxt', recommended_value=True,
|
||||||
|
help=_('Try to use the djvutxt program and fall back to pure '
|
||||||
|
'python implementation if it fails or is not available')),
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
|
stdout = StringIO()
|
||||||
|
ppdjvu = True
|
||||||
|
# using djvutxt is MUCH faster, should make it an option
|
||||||
|
if options.use_djvutxt and os.path.exists('/usr/bin/djvutxt'):
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
try:
|
||||||
|
fp = PersistentTemporaryFile(suffix='.djvu', prefix='djv_input')
|
||||||
|
filename = fp._name
|
||||||
|
fp.write(stream.read())
|
||||||
|
fp.close()
|
||||||
|
cmd = ['djvutxt', filename]
|
||||||
|
stdout.write(Popen(cmd, stdout=PIPE, close_fds=True).communicate()[0])
|
||||||
|
os.remove(filename)
|
||||||
|
ppdjvu = False
|
||||||
|
except:
|
||||||
|
stream.seek(0) # retry with the pure python converter
|
||||||
|
if ppdjvu:
|
||||||
|
from .djvu import DJVUFile
|
||||||
|
x = DJVUFile(stream)
|
||||||
|
x.get_text(stdout)
|
||||||
|
|
||||||
|
html = convert_basic(stdout.getvalue().replace(b"\n", b' ').replace(
|
||||||
|
b'\037', b'\n\n'))
|
||||||
|
# Run the HTMLized text through the html processing plugin.
|
||||||
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
|
html_input = plugin_for_input_format('html')
|
||||||
|
for opt in html_input.options:
|
||||||
|
setattr(options, opt.option.name, opt.recommended_value)
|
||||||
|
options.input_encoding = 'utf-8'
|
||||||
|
base = os.getcwdu()
|
||||||
|
if file_ext != 'txtz' and hasattr(stream, 'name'):
|
||||||
|
base = os.path.dirname(stream.name)
|
||||||
|
fname = os.path.join(base, 'index.html')
|
||||||
|
c = 0
|
||||||
|
while os.path.exists(fname):
|
||||||
|
c += 1
|
||||||
|
fname = 'index%d.html'%c
|
||||||
|
htmlfile = open(fname, 'wb')
|
||||||
|
with htmlfile:
|
||||||
|
htmlfile.write(html.encode('utf-8'))
|
||||||
|
odi = options.debug_pipeline
|
||||||
|
options.debug_pipeline = None
|
||||||
|
# Generate oeb from html conversion.
|
||||||
|
with open(htmlfile.name, 'rb') as f:
|
||||||
|
oeb = html_input.convert(f, options, 'html', log,
|
||||||
|
{})
|
||||||
|
options.debug_pipeline = odi
|
||||||
|
os.remove(htmlfile.name)
|
||||||
|
|
||||||
|
# Set metadata from file.
|
||||||
|
from calibre.customize.ui import get_file_type_metadata
|
||||||
|
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
|
||||||
|
mi = get_file_type_metadata(stream, file_ext)
|
||||||
|
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
|
||||||
|
|
||||||
|
return oeb
|
||||||
|
|
@ -246,6 +246,7 @@ class CSSFlattener(object):
|
|||||||
cssdict['font-size'] = '%.1fpt'%font_size
|
cssdict['font-size'] = '%.1fpt'%font_size
|
||||||
del node.attrib['size']
|
del node.attrib['size']
|
||||||
if 'face' in node.attrib:
|
if 'face' in node.attrib:
|
||||||
|
cssdict['font-family'] = node.attrib['face']
|
||||||
del node.attrib['face']
|
del node.attrib['face']
|
||||||
if 'color' in node.attrib:
|
if 'color' in node.attrib:
|
||||||
cssdict['color'] = node.attrib['color']
|
cssdict['color'] = node.attrib['color']
|
||||||
|
@ -397,6 +397,7 @@ class AddAction(InterfaceAction):
|
|||||||
d = error_dialog(self.gui, _('Add to library'), _('No book files found'))
|
d = error_dialog(self.gui, _('Add to library'), _('No book files found'))
|
||||||
d.exec_()
|
d.exec_()
|
||||||
return
|
return
|
||||||
|
paths = self.gui.device_manager.device.prepare_addable_books(paths)
|
||||||
from calibre.gui2.add import Adder
|
from calibre.gui2.add import Adder
|
||||||
self.__adder_func = partial(self._add_from_device_adder, on_card=None,
|
self.__adder_func = partial(self._add_from_device_adder, on_card=None,
|
||||||
model=view.model())
|
model=view.model())
|
||||||
|
24
src/calibre/gui2/convert/djvu_input.py
Normal file
24
src/calibre/gui2/convert/djvu_input.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.gui2.convert.djvu_input_ui import Ui_Form
|
||||||
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
|
class PluginWidget(Widget, Ui_Form):
|
||||||
|
|
||||||
|
TITLE = _('DJVU Input')
|
||||||
|
HELP = _('Options specific to')+' DJVU '+_('input')
|
||||||
|
COMMIT_NAME = 'djvu_input'
|
||||||
|
ICON = I('mimetypes/djvu.png')
|
||||||
|
|
||||||
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
|
Widget.__init__(self, parent,
|
||||||
|
['use_djvutxt', ])
|
||||||
|
self.db, self.book_id = db, book_id
|
||||||
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
28
src/calibre/gui2/convert/djvu_input.ui
Normal file
28
src/calibre/gui2/convert/djvu_input.ui
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ui version="4.0">
|
||||||
|
<class>Form</class>
|
||||||
|
<widget class="QWidget" name="Form">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>0</x>
|
||||||
|
<y>0</y>
|
||||||
|
<width>400</width>
|
||||||
|
<height>300</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="windowTitle">
|
||||||
|
<string>Form</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QVBoxLayout" name="verticalLayout">
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="opt_use_djvutxt">
|
||||||
|
<property name="text">
|
||||||
|
<string>Use &djvutxt, if available, for faster processing</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
<resources/>
|
||||||
|
<connections/>
|
||||||
|
</ui>
|
@ -538,14 +538,20 @@ class CoversModel(QAbstractListModel): # {{{
|
|||||||
current_cover = QPixmap(I('default_cover.png'))
|
current_cover = QPixmap(I('default_cover.png'))
|
||||||
|
|
||||||
self.blank = QPixmap(I('blank.png')).scaled(150, 200)
|
self.blank = QPixmap(I('blank.png')).scaled(150, 200)
|
||||||
|
self.cc = current_cover
|
||||||
|
self.reset_covers(do_reset=False)
|
||||||
|
|
||||||
self.covers = [self.get_item(_('Current cover'), current_cover)]
|
def reset_covers(self, do_reset=True):
|
||||||
|
self.covers = [self.get_item(_('Current cover'), self.cc)]
|
||||||
self.plugin_map = {}
|
self.plugin_map = {}
|
||||||
for i, plugin in enumerate(metadata_plugins(['cover'])):
|
for i, plugin in enumerate(metadata_plugins(['cover'])):
|
||||||
self.covers.append((plugin.name+'\n'+_('Searching...'),
|
self.covers.append((plugin.name+'\n'+_('Searching...'),
|
||||||
QVariant(self.blank), None, True))
|
QVariant(self.blank), None, True))
|
||||||
self.plugin_map[plugin] = i+1
|
self.plugin_map[plugin] = i+1
|
||||||
|
|
||||||
|
if do_reset:
|
||||||
|
self.reset()
|
||||||
|
|
||||||
def get_item(self, src, pmap, waiting=False):
|
def get_item(self, src, pmap, waiting=False):
|
||||||
sz = '%dx%d'%(pmap.width(), pmap.height())
|
sz = '%dx%d'%(pmap.width(), pmap.height())
|
||||||
text = QVariant(src + '\n' + sz)
|
text = QVariant(src + '\n' + sz)
|
||||||
@ -654,6 +660,9 @@ class CoversView(QListView): # {{{
|
|||||||
self.select(0)
|
self.select(0)
|
||||||
self.delegate.start_animation()
|
self.delegate.start_animation()
|
||||||
|
|
||||||
|
def reset_covers(self):
|
||||||
|
self.m.reset_covers()
|
||||||
|
|
||||||
def clear_failed(self):
|
def clear_failed(self):
|
||||||
plugin = self.m.plugin_for_index(self.currentIndex())
|
plugin = self.m.plugin_for_index(self.currentIndex())
|
||||||
self.m.clear_failed()
|
self.m.clear_failed()
|
||||||
@ -683,12 +692,18 @@ class CoversWidget(QWidget): # {{{
|
|||||||
l.addWidget(self.covers_view, 1, 0)
|
l.addWidget(self.covers_view, 1, 0)
|
||||||
self.continue_processing = True
|
self.continue_processing = True
|
||||||
|
|
||||||
|
def reset_covers(self):
|
||||||
|
self.covers_view.reset_covers()
|
||||||
|
|
||||||
def start(self, book, current_cover, title, authors):
|
def start(self, book, current_cover, title, authors):
|
||||||
|
self.continue_processing = True
|
||||||
|
self.abort.clear()
|
||||||
self.book, self.current_cover = book, current_cover
|
self.book, self.current_cover = book, current_cover
|
||||||
self.title, self.authors = title, authors
|
self.title, self.authors = title, authors
|
||||||
self.log('Starting cover download for:', book.title)
|
self.log('Starting cover download for:', book.title)
|
||||||
self.log('Query:', title, authors, self.book.identifiers)
|
self.log('Query:', title, authors, self.book.identifiers)
|
||||||
self.msg.setText('<p>'+_('Downloading covers for <b>%s</b>, please wait...')%book.title)
|
self.msg.setText('<p>'+
|
||||||
|
_('Downloading covers for <b>%s</b>, please wait...')%book.title)
|
||||||
self.covers_view.start()
|
self.covers_view.start()
|
||||||
|
|
||||||
self.worker = CoverWorker(self.log, self.abort, self.title,
|
self.worker = CoverWorker(self.log, self.abort, self.title,
|
||||||
@ -726,8 +741,9 @@ class CoversWidget(QWidget): # {{{
|
|||||||
if num < 2:
|
if num < 2:
|
||||||
txt = _('Could not find any covers for <b>%s</b>')%self.book.title
|
txt = _('Could not find any covers for <b>%s</b>')%self.book.title
|
||||||
else:
|
else:
|
||||||
txt = _('Found <b>%(num)d</b> covers of %(title)s. Pick the one you like'
|
txt = _('Found <b>%(num)d</b> covers of %(title)s. '
|
||||||
' best.')%dict(num=num-1, title=self.title)
|
'Pick the one you like best.')%dict(num=num-1,
|
||||||
|
title=self.title)
|
||||||
self.msg.setText(txt)
|
self.msg.setText(txt)
|
||||||
|
|
||||||
self.finished.emit()
|
self.finished.emit()
|
||||||
@ -832,10 +848,14 @@ class FullFetch(QDialog): # {{{
|
|||||||
self.next_button.clicked.connect(self.next_clicked)
|
self.next_button.clicked.connect(self.next_clicked)
|
||||||
self.ok_button = self.bb.button(self.bb.Ok)
|
self.ok_button = self.bb.button(self.bb.Ok)
|
||||||
self.ok_button.clicked.connect(self.ok_clicked)
|
self.ok_button.clicked.connect(self.ok_clicked)
|
||||||
|
self.prev_button = self.bb.addButton(_('Back'), self.bb.ActionRole)
|
||||||
|
self.prev_button.setIcon(QIcon(I('back.png')))
|
||||||
|
self.prev_button.clicked.connect(self.back_clicked)
|
||||||
self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
|
self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
|
||||||
self.log_button.clicked.connect(self.view_log)
|
self.log_button.clicked.connect(self.view_log)
|
||||||
self.log_button.setIcon(QIcon(I('debug.png')))
|
self.log_button.setIcon(QIcon(I('debug.png')))
|
||||||
self.ok_button.setVisible(False)
|
self.ok_button.setVisible(False)
|
||||||
|
self.prev_button.setVisible(False)
|
||||||
|
|
||||||
self.identify_widget = IdentifyWidget(self.log, self)
|
self.identify_widget = IdentifyWidget(self.log, self)
|
||||||
self.identify_widget.rejected.connect(self.reject)
|
self.identify_widget.rejected.connect(self.reject)
|
||||||
@ -857,12 +877,21 @@ class FullFetch(QDialog): # {{{
|
|||||||
def book_selected(self, book):
|
def book_selected(self, book):
|
||||||
self.next_button.setVisible(False)
|
self.next_button.setVisible(False)
|
||||||
self.ok_button.setVisible(True)
|
self.ok_button.setVisible(True)
|
||||||
|
self.prev_button.setVisible(True)
|
||||||
self.book = book
|
self.book = book
|
||||||
self.stack.setCurrentIndex(1)
|
self.stack.setCurrentIndex(1)
|
||||||
self.log('\n\n')
|
self.log('\n\n')
|
||||||
self.covers_widget.start(book, self.current_cover,
|
self.covers_widget.start(book, self.current_cover,
|
||||||
self.title, self.authors)
|
self.title, self.authors)
|
||||||
|
|
||||||
|
def back_clicked(self):
|
||||||
|
self.next_button.setVisible(True)
|
||||||
|
self.ok_button.setVisible(False)
|
||||||
|
self.prev_button.setVisible(False)
|
||||||
|
self.stack.setCurrentIndex(0)
|
||||||
|
self.covers_widget.cancel()
|
||||||
|
self.covers_widget.reset_covers()
|
||||||
|
|
||||||
def accept(self):
|
def accept(self):
|
||||||
# Prevent the usual dialog accept mechanisms from working
|
# Prevent the usual dialog accept mechanisms from working
|
||||||
pass
|
pass
|
||||||
|
@ -58,7 +58,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
self.device_to_formats_map = {}
|
self.device_to_formats_map = {}
|
||||||
for device in device_plugins():
|
for device in device_plugins():
|
||||||
n = device_name_for_plugboards(device)
|
n = device_name_for_plugboards(device)
|
||||||
self.device_to_formats_map[n] = set(device.FORMATS)
|
self.device_to_formats_map[n] = set(device.settings().format_map)
|
||||||
if getattr(device, 'CAN_DO_DEVICE_DB_PLUGBOARD', False):
|
if getattr(device, 'CAN_DO_DEVICE_DB_PLUGBOARD', False):
|
||||||
self.device_to_formats_map[n].add('device_db')
|
self.device_to_formats_map[n].add('device_db')
|
||||||
if n not in self.devices:
|
if n not in self.devices:
|
||||||
|
@ -206,7 +206,7 @@
|
|||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_autolaunch_server">
|
<widget class="QCheckBox" name="opt_autolaunch_server">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Run server &automatically on startup</string>
|
<string>Run server &automatically when calibre starts</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -37,6 +37,7 @@ class SearchRestrictionMixin(object):
|
|||||||
search = unicode(search)
|
search = unicode(search)
|
||||||
if not search:
|
if not search:
|
||||||
self.search_restriction.setCurrentIndex(0)
|
self.search_restriction.setCurrentIndex(0)
|
||||||
|
self._apply_search_restriction('')
|
||||||
else:
|
else:
|
||||||
s = '*' + search
|
s = '*' + search
|
||||||
if self.search_restriction.count() > 1:
|
if self.search_restriction.count() > 1:
|
||||||
|
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -37,27 +36,16 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
|
search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||||
url = search_url + urllib.quote_plus(query)
|
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
|
||||||
# Amazon has two results pages.
|
|
||||||
# 20110725: seems that is_shot is gone.
|
|
||||||
# is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
|
|
||||||
# # Horizontal grid of books.
|
|
||||||
# if is_shot:
|
|
||||||
# data_xpath = '//div[contains(@class, "result")]'
|
|
||||||
# format_xpath = './/div[@class="productTitle"]/text()'
|
|
||||||
# cover_xpath = './/div[@class="productTitle"]//img/@src'
|
|
||||||
# # Vertical list of books.
|
|
||||||
# else:
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
# end is_shot else
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
@ -80,11 +68,9 @@ class AmazonDEKindleStore(StorePlugin):
|
|||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
||||||
|
|
||||||
# if is_shot:
|
|
||||||
# author = format.split(' von ')[-1]
|
|
||||||
# else:
|
|
||||||
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
||||||
author = author.split('von ')[-1]
|
if author.startswith('von '):
|
||||||
|
author = author[4:]
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
|
82
src/calibre/gui2/store/stores/amazon_fr_plugin.py
Normal file
82
src/calibre/gui2/store/stores/amazon_fr_plugin.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from contextlib import closing
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
from PyQt4.Qt import QUrl
|
||||||
|
|
||||||
|
from calibre import browser
|
||||||
|
from calibre.gui2 import open_url
|
||||||
|
from calibre.gui2.store import StorePlugin
|
||||||
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
|
||||||
|
class AmazonFRKindleStore(StorePlugin):
|
||||||
|
'''
|
||||||
|
For comments on the implementation, please see amazon_plugin.py
|
||||||
|
'''
|
||||||
|
|
||||||
|
def open(self, parent=None, detail_item=None, external=False):
|
||||||
|
aff_id = {'tag': 'charhale-21'}
|
||||||
|
store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
|
||||||
|
|
||||||
|
if detail_item:
|
||||||
|
aff_id['asin'] = detail_item
|
||||||
|
store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
|
||||||
|
open_url(QUrl(store_link))
|
||||||
|
|
||||||
|
def search(self, query, max_results=10, timeout=60):
|
||||||
|
search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||||
|
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
|
br = browser()
|
||||||
|
|
||||||
|
counter = max_results
|
||||||
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
|
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
|
||||||
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
|
|
||||||
|
for data in doc.xpath(data_xpath):
|
||||||
|
if counter <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Even though we are searching digital-text only Amazon will still
|
||||||
|
# put in results for non Kindle books (author pages). So we need
|
||||||
|
# to explicitly check if the item is a Kindle book and ignore it
|
||||||
|
# if it isn't.
|
||||||
|
format = ''.join(data.xpath(format_xpath))
|
||||||
|
if 'kindle' not in format.lower():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# We must have an asin otherwise we can't easily reference the
|
||||||
|
# book later.
|
||||||
|
asin = ''.join(data.xpath("@name"))
|
||||||
|
|
||||||
|
cover_url = ''.join(data.xpath(cover_xpath))
|
||||||
|
|
||||||
|
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
||||||
|
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
||||||
|
author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
|
||||||
|
if author.startswith('de '):
|
||||||
|
author = author[3:]
|
||||||
|
|
||||||
|
counter -= 1
|
||||||
|
|
||||||
|
s = SearchResult()
|
||||||
|
s.cover_url = cover_url.strip()
|
||||||
|
s.title = title.strip()
|
||||||
|
s.author = author.strip()
|
||||||
|
s.price = price.strip()
|
||||||
|
s.detail_item = asin.strip()
|
||||||
|
s.formats = 'Kindle'
|
||||||
|
s.drm = SearchResult.DRM_UNKNOWN
|
||||||
|
|
||||||
|
yield s
|
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -122,12 +121,12 @@ class AmazonKindleStore(StorePlugin):
|
|||||||
open_url(QUrl(store_link))
|
open_url(QUrl(store_link))
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
url = self.search_url + urllib.quote_plus(query)
|
url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
|
||||||
# Amazon has two results pages.
|
# Amazon has two results pages.
|
||||||
is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
|
is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
|
||||||
|
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import urllib
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -34,27 +33,16 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
|
search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
|
||||||
url = search_url + urllib.quote_plus(query)
|
url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
|
||||||
br = browser()
|
br = browser()
|
||||||
|
|
||||||
counter = max_results
|
counter = max_results
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
doc = html.fromstring(f.read())
|
doc = html.fromstring(f.read().decode('latin-1', 'replace'))
|
||||||
|
|
||||||
# Amazon has two results pages.
|
|
||||||
# 20110725: seems that is_shot is gone.
|
|
||||||
# is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
|
|
||||||
# # Horizontal grid of books.
|
|
||||||
# if is_shot:
|
|
||||||
# data_xpath = '//div[contains(@class, "result")]'
|
|
||||||
# format_xpath = './/div[@class="productTitle"]/text()'
|
|
||||||
# cover_xpath = './/div[@class="productTitle"]//img/@src'
|
|
||||||
# # Vertical list of books.
|
|
||||||
# else:
|
|
||||||
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
|
||||||
format_xpath = './/span[@class="format"]/text()'
|
format_xpath = './/span[@class="format"]/text()'
|
||||||
cover_xpath = './/img[@class="productImage"]/@src'
|
cover_xpath = './/img[@class="productImage"]/@src'
|
||||||
# end is_shot else
|
|
||||||
|
|
||||||
for data in doc.xpath(data_xpath):
|
for data in doc.xpath(data_xpath):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
@ -77,11 +65,9 @@ class AmazonUKKindleStore(StorePlugin):
|
|||||||
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
|
||||||
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
|
||||||
|
|
||||||
# if is_shot:
|
|
||||||
# author = format.split(' von ')[-1]
|
|
||||||
# else:
|
|
||||||
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
|
||||||
author = author.split('by ')[-1]
|
if author.startswith('by '):
|
||||||
|
author = author[3:]
|
||||||
|
|
||||||
counter -= 1
|
counter -= 1
|
||||||
|
|
||||||
|
@ -47,6 +47,9 @@ def get_parser(usage):
|
|||||||
def get_db(dbpath, options):
|
def get_db(dbpath, options):
|
||||||
if options.library_path is not None:
|
if options.library_path is not None:
|
||||||
dbpath = options.library_path
|
dbpath = options.library_path
|
||||||
|
if dbpath is None:
|
||||||
|
raise ValueError('No saved library path, either run the GUI or use the'
|
||||||
|
' --with-library option')
|
||||||
dbpath = os.path.abspath(dbpath)
|
dbpath = os.path.abspath(dbpath)
|
||||||
return LibraryDatabase2(dbpath)
|
return LibraryDatabase2(dbpath)
|
||||||
|
|
||||||
|
@ -133,7 +133,7 @@ class Rule(object): # {{{
|
|||||||
'lt': ('1', '', ''),
|
'lt': ('1', '', ''),
|
||||||
'gt': ('', '', '1')
|
'gt': ('', '', '1')
|
||||||
}[action]
|
}[action]
|
||||||
return "cmp(format_date(raw_field('%s'), 'yyyy-MM-dd'), %s, '%s', '%s', '%s')" % (col,
|
return "strcmp(format_date(raw_field('%s'), 'yyyy-MM-dd'), '%s', '%s', '%s', '%s')" % (col,
|
||||||
val, lt, eq, gt)
|
val, lt, eq, gt)
|
||||||
|
|
||||||
def multiple_condition(self, col, action, val, sep):
|
def multiple_condition(self, col, action, val, sep):
|
||||||
|
@ -266,7 +266,7 @@ The following functions are available in addition to those described in single-f
|
|||||||
* ``has_cover()`` -- return ``Yes`` if the book has a cover, otherwise return the empty string
|
* ``has_cover()`` -- return ``Yes`` if the book has a cover, otherwise return the empty string
|
||||||
* ``not(value)`` -- returns the string "1" if the value is empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
|
* ``not(value)`` -- returns the string "1" if the value is empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
|
||||||
* ``list_difference(list1, list2, separator)`` -- return a list made by removing from `list1` any item found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
|
* ``list_difference(list1, list2, separator)`` -- return a list made by removing from `list1` any item found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
|
||||||
* ``list_equals(list1, sep1, list2, sep2, yes_val, no_val) -- return `yes_val` if list1 and list2 contain the same items, otherwise return `no_val`. The items are determined by splitting each list using the appropriate separator character (`sep1` or `sep2`). The order of items in the lists is not relevant. The compare is case insensitive.
|
* ``list_equals(list1, sep1, list2, sep2, yes_val, no_val)`` -- return `yes_val` if `list1` and `list2` contain the same items, otherwise return `no_val`. The items are determined by splitting each list using the appropriate separator character (`sep1` or `sep2`). The order of items in the lists is not relevant. The compare is case insensitive.
|
||||||
* ``list_intersection(list1, list2, separator)`` -- return a list made by removing from `list1` any item not found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
|
* ``list_intersection(list1, list2, separator)`` -- return a list made by removing from `list1` any item not found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
|
||||||
* ``list_sort(list, direction, separator)`` -- return list sorted using a case-insensitive sort. If `direction` is zero, the list is sorted ascending, otherwise descending. The list items are separated by separator, as are the items in the returned list.
|
* ``list_sort(list, direction, separator)`` -- return list sorted using a case-insensitive sort. If `direction` is zero, the list is sorted ascending, otherwise descending. The list items are separated by separator, as are the items in the returned list.
|
||||||
* ``list_union(list1, list2, separator)`` -- return a list made by merging the items in list1 and list2, removing duplicate items using a case-insensitive compare. If items differ in case, the one in list1 is used. The items in list1 and list2 are separated by separator, as are the items in the returned list.
|
* ``list_union(list1, list2, separator)`` -- return a list made by merging the items in list1 and list2, removing duplicate items using a case-insensitive compare. If items differ in case, the one in list1 is used. The items in list1 and list2 are separated by separator, as are the items in the returned list.
|
||||||
|
@ -18,11 +18,13 @@ def create_mail(from_, to, subject, text=None, attachment_data=None,
|
|||||||
assert text or attachment_data
|
assert text or attachment_data
|
||||||
|
|
||||||
from email.mime.multipart import MIMEMultipart
|
from email.mime.multipart import MIMEMultipart
|
||||||
|
from email.utils import formatdate
|
||||||
|
|
||||||
outer = MIMEMultipart()
|
outer = MIMEMultipart()
|
||||||
outer['Subject'] = subject
|
outer['Subject'] = subject
|
||||||
outer['To'] = to
|
outer['To'] = to
|
||||||
outer['From'] = from_
|
outer['From'] = from_
|
||||||
|
outer['Date'] = formatdate(localtime=True)
|
||||||
outer.preamble = 'You will not see this in a MIME-aware mail reader.\n'
|
outer.preamble = 'You will not see this in a MIME-aware mail reader.\n'
|
||||||
|
|
||||||
if text is not None:
|
if text is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user