mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KG updates
This commit is contained in:
commit
7ad7c34eee
BIN
resources/images/news/diariovasco.png
Normal file
BIN
resources/images/news/diariovasco.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 766 B |
@ -5,76 +5,103 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
theatlantic.com
|
||||
'''
|
||||
import re
|
||||
import string
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
|
||||
|
||||
class TheAtlantic(BasicNewsRecipe):
|
||||
|
||||
title = 'The Atlantic'
|
||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||
description = 'Current affairs and politics focussed on the US'
|
||||
INDEX = 'http://www.theatlantic.com/doc/current'
|
||||
INDEX = 'http://www.theatlantic.com/magazine/toc/0/'
|
||||
language = 'en'
|
||||
|
||||
remove_tags_before = dict(name='div', id='storytop')
|
||||
remove_tags = [
|
||||
dict(name='div', id=['seealso','storybottom', 'footer', 'ad_banner_top', 'sidebar','articletoolstop','subcontent',]),
|
||||
dict(name='p', attrs={'id':["pagination"]}),
|
||||
dict(name='table',attrs={'class':"tools"}),
|
||||
dict(name='style'),
|
||||
dict(name='a', href='/a/newsletters.mhtml')
|
||||
]
|
||||
remove_attributes = ['icap', 'callout', 'style']
|
||||
no_stylesheets = True
|
||||
conversion_options = { 'linearize_tables':True }
|
||||
remove_tags_before = dict(name='div', id='articleHead')
|
||||
remove_tags_after = dict(id='copyright')
|
||||
remove_tags = [dict(id=['header', 'printAds', 'pageControls'])]
|
||||
no_stylesheets = True
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/archive/', '/print/')
|
||||
|
||||
extra_css = '''
|
||||
#timestamp{font-family:Arial,Helvetica,sans-serif; color:#666666 ;font-size:x-small}
|
||||
#storytype{font-family:Arial,Helvetica,sans-serif; color:#D52B1E ;font-weight:bold; font-size:x-small}
|
||||
h2{font-family:georgia,serif; font-style:italic;font-size:x-small;font-weight:normal;}
|
||||
h1{font-family:georgia,serif; font-weight:bold; font-size:large}
|
||||
#byline{font-family:georgia,serif; font-weight:bold; font-size:x-small}
|
||||
#topgraf{font-family:Arial,Helvetica,sans-serif;font-size:x-small;font-weight:bold;}
|
||||
.artsans{{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
|
||||
'''
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
sectit = soup.find('h1', attrs={'class':'sectionTitle'})
|
||||
if sectit is not None:
|
||||
texts = sectit.findAll('cufontext')
|
||||
texts = map(self.tag_to_string, texts[-2:])
|
||||
self.timefmt = ' [%s]'%(''.join(texts))
|
||||
|
||||
issue = soup.find('span', attrs={'class':'issue'})
|
||||
if issue:
|
||||
self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
|
||||
|
||||
cover = soup.find('img', alt=re.compile('Cover'), src=True)
|
||||
cover = soup.find('img', src=True, attrs={'class':'cover'})
|
||||
if cover is not None:
|
||||
self.cover_url = 'http://theatlantic.com'+cover['src']
|
||||
self.cover_url = cover['src']
|
||||
|
||||
for item in soup.findAll('div', attrs={'class':'item'}):
|
||||
a = item.find('a')
|
||||
if a and a.has_key('href'):
|
||||
feeds = []
|
||||
for section in soup.findAll('div', attrs={'class':'magazineSection'}):
|
||||
section_title = section.find(attrs={'class':'sectionHeader'})
|
||||
section_title = string.capwords(self.tag_to_string(section_title))
|
||||
self.log('Found section:', section_title)
|
||||
articles = []
|
||||
for post in section.findAll('div', attrs={'class':'post'}):
|
||||
h = post.find(['h3', 'h4'])
|
||||
title = self.tag_to_string(h)
|
||||
a = post.find('a', href=True)
|
||||
url = a['href']
|
||||
if not url.startswith('http://'):
|
||||
url = 'http://www.theatlantic.com/'+url
|
||||
url = url.replace('/doc/', '/doc/print/')
|
||||
title = self.tag_to_string(a)
|
||||
if title in ('VIDEO', 'AUDIO', 'INTERACTIVE MAP', 'SIDEBAR', 'RECIPES'):
|
||||
continue
|
||||
title = title.replace('&', '&')
|
||||
byline = item.find(attrs={'class':'byline'})
|
||||
date = self.tag_to_string(byline) if byline else ''
|
||||
description = ''
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theatlantic.com'+url
|
||||
p = post.find('p', attrs={'class':'dek'})
|
||||
desc = None
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
self.log('\t\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':''})
|
||||
feeds.append((section_title, articles))
|
||||
|
||||
self.log('\tFound article:', title)
|
||||
self.log('\t\t', url)
|
||||
poems = []
|
||||
self.log('Found section: Poems')
|
||||
for poem in soup.findAll('div', attrs={'class':'poem'}):
|
||||
title = self.tag_to_string(poem.find('h4'))
|
||||
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
|
||||
url = 'http://www.theatlantic.com'+poem.find('a')['href']
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
self.log('\t\t', desc)
|
||||
poems.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':''})
|
||||
if poems:
|
||||
feeds.append(('Poems', poems))
|
||||
|
||||
articles.append({
|
||||
'title':title,
|
||||
'date':date,
|
||||
'url':url,
|
||||
'description':description
|
||||
})
|
||||
self.log('Found section: Advice')
|
||||
div = soup.find(id='advice')
|
||||
title = self.tag_to_string(div.find('h4'))
|
||||
url = 'http://www.theatlantic.com'+div.find('a')['href']
|
||||
desc = self.tag_to_string(div.find('p'))
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
self.log('\t\t', desc)
|
||||
|
||||
feeds.append(('Advice', [{'title':title, 'url':url, 'description':desc,
|
||||
'date':''}]))
|
||||
return feeds
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for table in soup.findAll('table', align='right'):
|
||||
img = table.find('img')
|
||||
if img is not None:
|
||||
img.extract()
|
||||
caption = self.tag_to_string(table).strip()
|
||||
div = Tag(soup, 'div')
|
||||
div['style'] = 'text-align:center'
|
||||
div.insert(0, img)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
if caption:
|
||||
div.insert(2, NavigableString(caption))
|
||||
table.replaceWith(div)
|
||||
|
||||
return soup
|
||||
|
||||
return [('Current Issue', articles)]
|
||||
|
50
resources/recipes/diariovasco.recipe
Normal file
50
resources/recipes/diariovasco.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.diariovasco.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DiarioVasco(BasicNewsRecipe):
|
||||
title = 'Diario Vasco'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de pais Vasco y el resto del mundo'
|
||||
publisher = 'Diario Vasco'
|
||||
category = 'news, politics, Spain'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.diariovasco.com/img/rd.logotipo2_dvasco.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':'title'})
|
||||
,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
|
||||
]
|
||||
remove_tags = [dict(name='ul')]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas Noticias' , u'http://www.diariovasco.com/rss/feeds/ultima.xml' )
|
||||
,(u'Portada' , u'http://www.diariovasco.com/portada.xml' )
|
||||
,(u'Politica' , u'http://www.diariovasco.com/rss/feeds/politica.xml' )
|
||||
,(u'Deportes' , u'http://www.diariovasco.com/rss/feeds/deportes.xml' )
|
||||
,(u'Economia' , u'http://www.diariovasco.com/rss/feeds/economia.xml' )
|
||||
,(u'Mundo' , u'http://www.diariovasco.com/rss/feeds/mundo.xml' )
|
||||
,(u'Cultura' , u'http://www.diariovasco.com/rss/feeds/cultura.xml' )
|
||||
,(u'Gente' , u'http://www.diariovasco.com/rss/feeds/gente.xml' )
|
||||
,(u'Contraportada' , u'http://www.diariovasco.com/rss/feeds/contraportada.xml')
|
||||
]
|
@ -119,6 +119,8 @@ class Economist(BasicNewsRecipe):
|
||||
ns = NavigableString(self.tag_to_string(caption))
|
||||
div.insert(0, ns)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
del img['width']
|
||||
del img['height']
|
||||
img.extract()
|
||||
div.insert(2, img)
|
||||
table.replaceWith(div)
|
||||
|
@ -123,6 +123,8 @@ class Economist(BasicNewsRecipe):
|
||||
div.insert(0, ns)
|
||||
div.insert(1, Tag(soup, 'br'))
|
||||
img.extract()
|
||||
del img['width']
|
||||
del img['height']
|
||||
div.insert(2, img)
|
||||
table.replaceWith(div)
|
||||
return soup
|
||||
|
58
resources/recipes/epicurious.recipe
Normal file
58
resources/recipes/epicurious.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Starson17'
|
||||
'''
|
||||
www.epicurious.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Epicurious(BasicNewsRecipe):
|
||||
title = u'Epicurious'
|
||||
__author__ = 'Starson17'
|
||||
description = 'Food and Recipes from Epicurious'
|
||||
cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
|
||||
publisher = 'Epicurious'
|
||||
tags = 'news, food, gourmet, recipes'
|
||||
language = 'en'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
recursions = 3
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 20
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
|
||||
dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
|
||||
]
|
||||
|
||||
remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
|
||||
{'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
|
||||
dict(name='div', attrs={'class':['tagged','comments']})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
|
||||
|
||||
feeds = [
|
||||
(u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
|
||||
(u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
|
||||
(u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
|
||||
(u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
|
||||
]
|
||||
|
||||
match_regexps = [
|
||||
r'http://www.epicurious.com/.*recipes/.*/views'
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
|
||||
(re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
|
||||
(re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
|
||||
]
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
return soup
|
||||
|
@ -3,7 +3,7 @@ import re
|
||||
|
||||
class HuffingtonPostRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
__author__ = 'kwetal and Archana Raman'
|
||||
language = 'en'
|
||||
version = 2
|
||||
|
||||
@ -14,70 +14,89 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
|
||||
|
||||
oldest_article = 1.1
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = True
|
||||
#use_embedded_content = True
|
||||
|
||||
encoding = 'utf-8'
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
# Feeds from: http://www.huffingtonpost.com/syndication/
|
||||
feeds = []
|
||||
feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))
|
||||
|
||||
#feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
|
||||
feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
|
||||
feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
|
||||
feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
|
||||
#feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
|
||||
#feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
|
||||
|
||||
#feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
|
||||
feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
|
||||
feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
|
||||
feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
|
||||
#feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
|
||||
#feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
|
||||
|
||||
#feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
|
||||
feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
|
||||
feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
|
||||
feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
|
||||
#feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
|
||||
#feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
|
||||
|
||||
#feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
|
||||
feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
|
||||
feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
|
||||
feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
|
||||
#feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
|
||||
#feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
|
||||
|
||||
#feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
|
||||
feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
|
||||
feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
|
||||
feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
|
||||
#feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
|
||||
#feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
|
||||
|
||||
#feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
|
||||
feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
|
||||
feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
|
||||
feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
|
||||
#feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
|
||||
#feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
|
||||
|
||||
#feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
|
||||
feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
|
||||
feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
|
||||
feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
|
||||
#feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
|
||||
#feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
|
||||
|
||||
#feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
|
||||
feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
|
||||
feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
|
||||
feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
|
||||
#feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
|
||||
#feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
|
||||
|
||||
#feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
|
||||
feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
|
||||
feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
|
||||
feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
|
||||
#feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
|
||||
#feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
|
||||
|
||||
#feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
|
||||
feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
|
||||
feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
|
||||
feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
|
||||
#feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
|
||||
#feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
|
||||
|
||||
feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
|
||||
feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
|
||||
#feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
|
||||
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
|
||||
remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
|
||||
remove_tags.append(dict(name='a', attrs={'class' : 'home_pixie'}))
|
||||
remove_tags.append(dict(name='div', attrs={'id' : ["top_nav",'threeup_top_wrapper','breaking_news_container',"hp_social_network"]}))
|
||||
remove_tags.append(dict(name='img', alt="Connect"))
|
||||
remove_tags.append(dict(name='div', attrs={'class' : ['logo']})) #'share_boxes_box_block_b_wraper',
|
||||
remove_tags.append(dict(name='div', attrs={'class' :[ 'read_more with_verticals','chicklets_box_outter_v05','blogger_menu_content','chicklets_bar']}))
|
||||
remove_tags.append(dict(name='div', attrs={'class' : ['sidebar_blog_first_design','sidebar_blog_second_design',]}))
|
||||
remove_tags.append(dict(name='div', attrs={'class' : ['main_big_news_ontop','login-menu','sidebar_blog_third_design','read_more']}))
|
||||
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class' : 'entry_content'}) ]
|
||||
# remove_attributes = ['style']
|
||||
|
||||
remove_attributes = ['style']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
||||
h2{font-size: x-large; font-weight: bold; padding: 0em; margin-bottom: 0.2em;}
|
||||
a[href]{color: blue; text-decoration: none; cursor: pointer;}
|
||||
#title_permalink{color:black;font-size:large;}
|
||||
.date{color:#858585;font-family:"Times New Roman",sans-serif;}
|
||||
.comments_datetime v05{color:#696969;}
|
||||
.teaser_permalink{font-style:italic;font-size:xx-small;}
|
||||
.blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
|
||||
'''
|
||||
|
||||
#a[href]{color: blue; text-decoration: none; cursor: pointer;}
|
||||
def get_article_url(self, article):
|
||||
"""
|
||||
Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
|
||||
@ -85,10 +104,21 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
|
||||
Todo: refactor to searching this list to avoid the hardcoded zero-index
|
||||
"""
|
||||
link = article.get('link')
|
||||
print("Link:"+link)
|
||||
if not link:
|
||||
links = article.get('links')
|
||||
if links:
|
||||
link = links[0]['href']
|
||||
if not links[0]['href']:
|
||||
link = links[1]['href']
|
||||
|
||||
return link
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for tag in soup.findAll('div',text = "What's Your Reaction?"):
|
||||
tag.extract()
|
||||
|
||||
for tg in soup.findAll('blockquote'):
|
||||
tg.extract()
|
||||
|
||||
return soup
|
||||
|
37
resources/recipes/kukuburi.recipe
Normal file
37
resources/recipes/kukuburi.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Mori'
|
||||
__version__ = 'v. 0.1'
|
||||
'''
|
||||
Kukuburi.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class KukuburiRecipe(BasicNewsRecipe):
|
||||
__author__ = 'Mori'
|
||||
language = 'en'
|
||||
|
||||
title = u'Kukuburi'
|
||||
publisher = u'Ramón Pérez'
|
||||
description =u'KUKUBURI by Ram\xc3\xb3n P\xc3\xa9rez'
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
oldest_article = 100
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [
|
||||
(u'Kukuburi', u'http://feeds2.feedburner.com/Kukuburi')
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<!--.*?-->', lambda match: ''),
|
||||
(r'<div class="feedflare".*?</div>', lambda match: '')
|
||||
]
|
||||
]
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lasegunda.com
|
||||
'''
|
||||
@ -19,43 +17,38 @@ class LaSegunda(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||
remove_javascript = True
|
||||
language = 'es'
|
||||
masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||
remove_empty_feeds = True
|
||||
language = 'es'
|
||||
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='table')]
|
||||
remove_tags_before = dict(attrs={'class':'titulonegritastop'})
|
||||
remove_tags = [dict(name='img')]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
|
||||
,(u'Politica', u'http://www.lasegunda.com/rss20/index.asp?canal=21')
|
||||
,(u'Cronica', u'http://www.lasegunda.com/rss20/index.asp?canal=20')
|
||||
,(u'Internacional', u'http://www.lasegunda.com/rss20/index.asp?canal=23')
|
||||
,(u'Deportes', u'http://www.lasegunda.com/rss20/index.asp?canal=24')
|
||||
,(u'Epectaculos/Cultura', u'http://www.lasegunda.com/rss20/index.asp?canal=25')
|
||||
,(u'Educacion', u'http://www.lasegunda.com/rss20/index.asp?canal=26')
|
||||
,(u'Ciencia y Tecnologia', u'http://www.lasegunda.com/rss20/index.asp?canal=27')
|
||||
,(u'Solidaridad', u'http://www.lasegunda.com/rss20/index.asp?canal=28')
|
||||
,(u'Buena Vida', u'http://www.lasegunda.com/rss20/index.asp?canal=32')
|
||||
,(u'Politica' , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
|
||||
,(u'Cronica' , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
|
||||
,(u'Internacional' , u'http://www.lasegunda.com/rss20/index.asp?canal=23')
|
||||
,(u'Deportes' , u'http://www.lasegunda.com/rss20/index.asp?canal=24')
|
||||
,(u'Epectaculos/Cultura' , u'http://www.lasegunda.com/rss20/index.asp?canal=25')
|
||||
,(u'Educacion' , u'http://www.lasegunda.com/rss20/index.asp?canal=26')
|
||||
,(u'Ciencia y Tecnologia' , u'http://www.lasegunda.com/rss20/index.asp?canal=27')
|
||||
,(u'Solidaridad' , u'http://www.lasegunda.com/rss20/index.asp?canal=28')
|
||||
,(u'Buena Vida' , u'http://www.lasegunda.com/rss20/index.asp?canal=32')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
||||
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
latercera.com
|
||||
'''
|
||||
@ -18,32 +16,32 @@ class LaTercera(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
remove_empty_feeds = True
|
||||
language = 'es'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='script')
|
||||
,dict(name='ul')
|
||||
dict(name=['ul','input','base'])
|
||||
,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
|
||||
,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
|
||||
,dict(name='input')
|
||||
,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
|
||||
,(u'Pais', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=654')
|
||||
,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
|
||||
,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')
|
||||
,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
|
||||
,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
|
||||
,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
|
||||
@ -55,10 +53,6 @@ class LaTercera(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'es'
|
||||
|
@ -11,7 +11,8 @@ import sys, re, os, platform
|
||||
is64bit = platform.architecture()[0] == '64bit'
|
||||
iswindows = re.search('win(32|64)', sys.platform)
|
||||
isosx = 'darwin' in sys.platform
|
||||
islinux = not isosx and not iswindows
|
||||
isfreebsd = 'freebsd' in sys.platform
|
||||
islinux = not isosx and not iswindows and not isfreebsd
|
||||
SRC = os.path.abspath('src')
|
||||
sys.path.insert(0, SRC)
|
||||
sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
|
||||
@ -117,7 +118,7 @@ class Command(object):
|
||||
self.real_user = os.environ.get('SUDO_USER', None)
|
||||
|
||||
def drop_privileges(self):
|
||||
if not islinux or isosx:
|
||||
if not islinux or isosx or isfreebsd:
|
||||
return
|
||||
if self.real_user is not None:
|
||||
self.info('Dropping privileges to those of', self.real_user+':',
|
||||
@ -128,7 +129,7 @@ class Command(object):
|
||||
os.seteuid(int(self.real_uid))
|
||||
|
||||
def regain_privileges(self):
|
||||
if not islinux or isosx:
|
||||
if not islinux or isosx or isfreebsd:
|
||||
return
|
||||
if os.geteuid() != 0 and self.orig_euid == 0:
|
||||
self.info('Trying to get root privileges')
|
||||
|
@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
|
||||
fc_lib = '/usr/lib'
|
||||
podofo_inc = '/usr/include/podofo'
|
||||
podofo_lib = '/usr/lib'
|
||||
chmlib_inc_dirs = chmlib_lib_dirs = []
|
||||
|
||||
if iswindows:
|
||||
prefix = r'C:\cygwin\home\kovid\sw'
|
||||
@ -96,6 +97,10 @@ if iswindows:
|
||||
sw_lib_dir = os.path.join(prefix, 'lib')
|
||||
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
|
||||
fc_lib = sw_lib_dir
|
||||
chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
|
||||
'build', 'chmlib-0.40', 'src'))
|
||||
chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
|
||||
'build', 'chmlib-0.40', 'src', 'Release'))
|
||||
png_inc_dirs = [sw_inc_dir]
|
||||
png_lib_dirs = [sw_lib_dir]
|
||||
png_libs = ['png12']
|
||||
|
@ -11,15 +11,16 @@ from distutils import sysconfig
|
||||
|
||||
from PyQt4.pyqtconfig import QtGuiModuleMakefile
|
||||
|
||||
from setup import Command, islinux, isosx, SRC, iswindows
|
||||
from setup.build_environment import fc_inc, fc_lib, \
|
||||
from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
|
||||
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
|
||||
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
|
||||
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
|
||||
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
|
||||
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
|
||||
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
|
||||
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
|
||||
jpg_lib_dirs, chmlib_lib_dirs
|
||||
MT
|
||||
isunix = islinux or isosx
|
||||
isunix = islinux or isosx or isfreebsd
|
||||
|
||||
make = 'make' if isunix else NMAKE
|
||||
|
||||
@ -56,6 +57,22 @@ if iswindows:
|
||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
|
||||
|
||||
extensions = [
|
||||
|
||||
Extension('chmlib',
|
||||
['calibre/utils/chm/swig_chm.c'],
|
||||
libraries=['ChmLib' if iswindows else 'chm'],
|
||||
inc_dirs=chmlib_inc_dirs,
|
||||
lib_dirs=chmlib_lib_dirs,
|
||||
cflags=["-DSWIG_COBJECT_TYPES"]),
|
||||
|
||||
Extension('chm_extra',
|
||||
['calibre/utils/chm/extra.c'],
|
||||
libraries=['ChmLib' if iswindows else 'chm'],
|
||||
inc_dirs=chmlib_inc_dirs,
|
||||
lib_dirs=chmlib_lib_dirs,
|
||||
cflags=["-D__PYTHON__"]),
|
||||
|
||||
|
||||
Extension('pdfreflow',
|
||||
reflow_sources,
|
||||
headers=reflow_headers,
|
||||
@ -154,6 +171,13 @@ if islinux:
|
||||
ldflags.append('-lpython'+sysconfig.get_python_version())
|
||||
|
||||
|
||||
if isfreebsd:
|
||||
cflags.append('-pthread')
|
||||
ldflags.append('-shared')
|
||||
cflags.append('-I'+sysconfig.get_python_inc())
|
||||
ldflags.append('-lpython'+sysconfig.get_python_version())
|
||||
|
||||
|
||||
if isosx:
|
||||
x, p = ('i386', 'ppc')
|
||||
archs = ['-arch', x, '-arch', p, '-isysroot',
|
||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
|
||||
|
||||
from setup import Command, islinux, basenames, modules, functions, \
|
||||
from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
|
||||
__appname__, __version__
|
||||
|
||||
HEADER = '''\
|
||||
@ -116,7 +116,7 @@ class Develop(Command):
|
||||
|
||||
|
||||
def pre_sub_commands(self, opts):
|
||||
if not islinux:
|
||||
if not (islinux or isfreebsd):
|
||||
self.info('\nSetting up a source based development environment is only '
|
||||
'supported on linux. On other platforms, see the User Manual'
|
||||
' for help with setting up a development environment.')
|
||||
@ -156,7 +156,7 @@ class Develop(Command):
|
||||
self.warn('Failed to compile mount helper. Auto mounting of',
|
||||
' devices will not work')
|
||||
|
||||
if os.geteuid() != 0:
|
||||
if not isfreebsd and os.geteuid() != 0:
|
||||
return self.warn('Must be run as root to compile mount helper. Auto '
|
||||
'mounting of devices will not work.')
|
||||
src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
|
||||
@ -168,9 +168,10 @@ class Develop(Command):
|
||||
ret = p.wait()
|
||||
if ret != 0:
|
||||
return warn()
|
||||
os.chown(dest, 0, 0)
|
||||
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
|
||||
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
|
||||
if not isfreebsd:
|
||||
os.chown(dest, 0, 0)
|
||||
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
|
||||
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
|
||||
self.manifest.append(dest)
|
||||
return dest
|
||||
|
||||
|
@ -42,6 +42,7 @@ class LinuxFreeze(Command):
|
||||
'/usr/lib/liblcms.so.1',
|
||||
'/tmp/calibre-mount-helper',
|
||||
'/usr/lib/libunrar.so',
|
||||
'/usr/lib/libchm.so.0',
|
||||
'/usr/lib/libsqlite3.so.0',
|
||||
'/usr/lib/libsqlite3.so.0',
|
||||
'/usr/lib/libmng.so.1',
|
||||
|
@ -459,7 +459,7 @@ class Py2App(object):
|
||||
|
||||
@flush
|
||||
def add_misc_libraries(self):
|
||||
for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
|
||||
for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
|
||||
info('\nAdding', x)
|
||||
x = 'lib%s.dylib'%x
|
||||
shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
|
||||
|
@ -12,7 +12,7 @@ warnings.simplefilter('ignore', DeprecationWarning)
|
||||
|
||||
|
||||
from calibre.startup import plugins, winutil, winutilerror
|
||||
from calibre.constants import iswindows, isosx, islinux, isfrozen, \
|
||||
from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
|
||||
terminal_controller, preferred_encoding, \
|
||||
__appname__, __version__, __author__, \
|
||||
win32event, win32api, winerror, fcntl, \
|
||||
@ -22,7 +22,7 @@ import mechanize
|
||||
if False:
|
||||
winutil, winutilerror, __appname__, islinux, __version__
|
||||
fcntl, win32event, isfrozen, __author__, terminal_controller
|
||||
winerror, win32api
|
||||
winerror, win32api, isfreebsd
|
||||
|
||||
mimetypes.add_type('application/epub+zip', '.epub')
|
||||
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
||||
|
@ -22,7 +22,8 @@ terminal_controller = TerminalController(sys.stdout)
|
||||
iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
|
||||
isosx = 'darwin' in sys.platform.lower()
|
||||
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
|
||||
islinux = not(iswindows or isosx)
|
||||
isfreebsd = 'freebsd' in sys.platform.lower()
|
||||
islinux = not(iswindows or isosx or isfreebsd)
|
||||
isfrozen = hasattr(sys, 'frozen')
|
||||
isunix = isosx or islinux
|
||||
|
||||
@ -56,7 +57,8 @@ if plugins is None:
|
||||
sys.path.insert(0, plugin_path)
|
||||
|
||||
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
|
||||
'fontconfig', 'pdfreflow', 'progress_indicator'] + \
|
||||
'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
|
||||
'chm_extra'] + \
|
||||
(['winutil'] if iswindows else []) + \
|
||||
(['usbobserver'] if isosx else []):
|
||||
try:
|
||||
|
@ -119,11 +119,34 @@ class Plugin(object):
|
||||
|
||||
def __enter__(self, *args):
|
||||
if self.plugin_path is not None:
|
||||
sys.path.insert(0, self.plugin_path)
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
zf = ZipFile(self.plugin_path)
|
||||
extensions = set([x.rpartition('.')[-1].lower() for x in
|
||||
zf.namelist()])
|
||||
zip_safe = True
|
||||
for ext in ('pyd', 'so', 'dll', 'dylib'):
|
||||
if ext in extensions:
|
||||
zip_safe = False
|
||||
if zip_safe:
|
||||
sys.path.insert(0, self.plugin_path)
|
||||
self._sys_insertion_path = self.plugin_path
|
||||
else:
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
|
||||
self._sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
|
||||
zf.extractall(self._sys_insertion_path)
|
||||
sys.path.insert(0, self._sys_insertion_path)
|
||||
zf.close()
|
||||
|
||||
|
||||
def __exit__(self, *args):
|
||||
if self.plugin_path in sys.path:
|
||||
sys.path.remove(self.plugin_path)
|
||||
ip, it = getattr(self, '_sys_insertion_path', None), getattr(self,
|
||||
'_sys_insertion_tdir', None)
|
||||
if ip in sys.path:
|
||||
sys.path.remove(ip)
|
||||
if hasattr(it, '__exit__'):
|
||||
it.__exit__(*args)
|
||||
|
||||
|
||||
|
||||
class FileTypePlugin(Plugin):
|
||||
|
@ -103,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
|
||||
mi.cover_data = (ext.lower(), data)
|
||||
return mi
|
||||
|
||||
class CHMMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
name = 'Read CHM metadata'
|
||||
file_types = set(['chm'])
|
||||
description = _('Read metadata from %s files') % 'CHM'
|
||||
|
||||
def get_metadata(self, stream, ftype):
|
||||
from calibre.ebooks.metadata.chm import get_metadata
|
||||
return get_metadata(stream)
|
||||
|
||||
|
||||
class EPUBMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
name = 'Read EPUB metadata'
|
||||
@ -384,6 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
|
||||
from calibre.ebooks.tcr.input import TCRInput
|
||||
from calibre.ebooks.txt.input import TXTInput
|
||||
from calibre.ebooks.lrf.input import LRFInput
|
||||
from calibre.ebooks.chm.input import CHMInput
|
||||
|
||||
from calibre.ebooks.epub.output import EPUBOutput
|
||||
from calibre.ebooks.fb2.output import FB2Output
|
||||
@ -444,6 +456,7 @@ plugins += [
|
||||
TCRInput,
|
||||
TXTInput,
|
||||
LRFInput,
|
||||
CHMInput,
|
||||
]
|
||||
plugins += [
|
||||
EPUBOutput,
|
||||
|
@ -401,7 +401,7 @@ def initialize_plugins():
|
||||
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
|
||||
except PluginNotFound:
|
||||
continue
|
||||
plugin = initialize_plugin(plugin, zfp if not isinstance(zfp, type) else zfp)
|
||||
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
|
||||
_initialized_plugins.append(plugin)
|
||||
except:
|
||||
print 'Failed to initialize plugin...'
|
||||
|
@ -23,6 +23,8 @@ Run an embedded python interpreter.
|
||||
help='Debug the specified device driver.')
|
||||
parser.add_option('-g', '--gui', default=False, action='store_true',
|
||||
help='Run the GUI',)
|
||||
parser.add_option('-w', '--viewer', default=False, action='store_true',
|
||||
help='Run the ebook viewer',)
|
||||
parser.add_option('--paths', default=False, action='store_true',
|
||||
help='Output the paths necessary to setup the calibre environment')
|
||||
parser.add_option('--migrate', action='store_true', default=False,
|
||||
@ -98,6 +100,12 @@ def main(args=sys.argv):
|
||||
if opts.gui:
|
||||
from calibre.gui2.main import main
|
||||
main(['calibre'])
|
||||
elif opts.viewer:
|
||||
from calibre.gui2.viewer.main import main
|
||||
vargs = ['ebook-viewer', '--debug-javascript']
|
||||
if len(args) > 1:
|
||||
vargs.append(args[-1])
|
||||
main(vargs)
|
||||
elif opts.command:
|
||||
sys.argv = args[:1]
|
||||
exec opts.command
|
||||
|
@ -15,7 +15,7 @@ class ANDROID(USBMS):
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
|
||||
# Ordered list of supported formats
|
||||
FORMATS = ['epub']
|
||||
FORMATS = ['epub', 'pdf']
|
||||
|
||||
VENDOR_ID = {
|
||||
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
||||
|
@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
|
||||
c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
|
||||
from errno import EBUSY, ENOMEM
|
||||
|
||||
from calibre import iswindows, isosx, load_library
|
||||
from calibre import iswindows, isosx, isfreebsd, load_library
|
||||
|
||||
_libusb_name = 'libusb'
|
||||
PATH_MAX = 511 if iswindows else 1024 if isosx else 4096
|
||||
PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
|
||||
if iswindows:
|
||||
class Structure(_Structure):
|
||||
_pack_ = 1
|
||||
|
@ -398,16 +398,6 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
if len(matches) > 2:
|
||||
drives['cardb'] = matches[2]
|
||||
|
||||
pat = self.OSX_MAIN_MEM_VOL_PAT
|
||||
if pat is not None and len(drives) > 1 and 'main' in drives:
|
||||
if pat.search(drives['main']) is None:
|
||||
main = drives['main']
|
||||
for x in ('carda', 'cardb'):
|
||||
if x in drives and pat.search(drives[x]):
|
||||
drives['main'] = drives.pop(x)
|
||||
drives[x] = main
|
||||
break
|
||||
|
||||
return drives
|
||||
|
||||
def osx_bsd_names(self):
|
||||
@ -431,6 +421,16 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
if drives['main'] is None:
|
||||
print bsd_drives, mount_map, drives
|
||||
raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
|
||||
pat = self.OSX_MAIN_MEM_VOL_PAT
|
||||
if pat is not None and len(drives) > 1 and 'main' in drives:
|
||||
if pat.search(drives['main']) is None:
|
||||
main = drives['main']
|
||||
for x in ('carda', 'cardb'):
|
||||
if x in drives and pat.search(drives[x]):
|
||||
drives['main'] = drives.pop(x)
|
||||
drives[x] = main
|
||||
break
|
||||
|
||||
self._main_prefix = drives['main']+os.sep
|
||||
def get_card_prefix(c):
|
||||
ans = drives.get(c, None)
|
||||
|
@ -1,213 +1,17 @@
|
||||
from __future__ import with_statement
|
||||
''' CHM File decoding support '''
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, shutil, uuid, re
|
||||
from tempfile import mkdtemp
|
||||
from mimetypes import guess_type as guess_mimetype
|
||||
import os, uuid
|
||||
|
||||
from BeautifulSoup import BeautifulSoup, NavigableString
|
||||
from lxml import html
|
||||
from pychm.chm import CHMFile
|
||||
from pychm.chmlib import (
|
||||
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
||||
chm_enumerate,
|
||||
)
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
|
||||
|
||||
def match_string(s1, s2_already_lowered):
|
||||
if s1 is not None and s2_already_lowered is not None:
|
||||
if s1.lower()==s2_already_lowered:
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_all_prev_empty(tag):
|
||||
if tag is None:
|
||||
return True
|
||||
if tag.__class__ == NavigableString and not check_empty(tag):
|
||||
return False
|
||||
return check_all_prev_empty(tag.previousSibling)
|
||||
|
||||
def check_empty(s, rex = re.compile(r'\S')):
|
||||
return rex.search(s) is None
|
||||
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
||||
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
||||
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help=_("Set the book title"))
|
||||
parser.add_option('--title-sort', action='store', type='string', default=None,
|
||||
dest='title_sort', help=_('Set sort key for the title'))
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help=_("Set the author"))
|
||||
parser.add_option('--author-sort', action='store', type='string', default=None,
|
||||
dest='author_sort', help=_('Set sort key for the author'))
|
||||
parser.add_option("-c", "--category", action="store", type="string", \
|
||||
dest="category", help=_("The category this book belongs"
|
||||
" to. E.g.: History"))
|
||||
parser.add_option("--thumbnail", action="store", type="string", \
|
||||
dest="thumbnail", help=_("Path to a graphic that will be"
|
||||
" set as this files' thumbnail"))
|
||||
parser.add_option("--comment", action="store", type="string", \
|
||||
dest="freetext", help=_("Path to a txt file containing a comment."))
|
||||
parser.add_option("--get-thumbnail", action="store_true", \
|
||||
dest="get_thumbnail", default=False, \
|
||||
help=_("Extract thumbnail from LRF file"))
|
||||
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
|
||||
parser.add_option('--classification', default=None, help=_('Set the book classification'))
|
||||
parser.add_option('--creator', default=None, help=_('Set the book creator'))
|
||||
parser.add_option('--producer', default=None, help=_('Set the book producer'))
|
||||
parser.add_option('--get-cover', action='store_true', default=False,
|
||||
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
|
||||
parser.add_option('--bookid', action='store', type='string', default=None,
|
||||
dest='book_id', help=_('Set book ID'))
|
||||
parser.add_option('--font-delta', action='store', type='int', default=0,
|
||||
dest='font_delta', help=_('Set font delta'))
|
||||
return parser
|
||||
|
||||
class CHMError(Exception):
|
||||
pass
|
||||
|
||||
class CHMReader(CHMFile):
|
||||
def __init__(self, input, log):
|
||||
CHMFile.__init__(self)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
self._metadata = False
|
||||
self._extracted = False
|
||||
|
||||
# location of '.hhc' file, which is the CHM TOC.
|
||||
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
||||
self.hhc_path = self.root + ".hhc"
|
||||
|
||||
|
||||
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
||||
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
||||
self._playorder += 1
|
||||
for li in ul('li', recursive=False):
|
||||
href = li.object('param', {'name': 'Local'})[0]['value']
|
||||
if href.count('#'):
|
||||
href, frag = href.split('#')
|
||||
else:
|
||||
frag = None
|
||||
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
|
||||
#print "========>", name
|
||||
toc.add_item(href, frag, name, play_order=self._playorder)
|
||||
self._playorder += 1
|
||||
if li.ul:
|
||||
child = self._parse_toc(li.ul)
|
||||
child.parent = toc
|
||||
toc.append(child)
|
||||
#print toc
|
||||
return toc
|
||||
|
||||
|
||||
def GetFile(self, path):
|
||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||
# makes them relative. So we don't have to worry, re-add the leading /.
|
||||
# note this path refers to the internal CHM structure
|
||||
if path[0] != '/':
|
||||
path = '/' + path
|
||||
res, ui = self.ResolveObject(path)
|
||||
if res != CHM_RESOLVE_SUCCESS:
|
||||
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
||||
size, data = self.RetrieveObject(ui)
|
||||
if size == 0:
|
||||
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
||||
return data
|
||||
|
||||
def ExtractFiles(self, output_dir=os.getcwdu()):
|
||||
for path in self.Contents():
|
||||
lpath = os.path.join(output_dir, path)
|
||||
self._ensure_dir(lpath)
|
||||
data = self.GetFile(path)
|
||||
with open(lpath, 'wb') as f:
|
||||
if guess_mimetype(path)[0] == ('text/html'):
|
||||
data = self._reformat(data)
|
||||
f.write(data)
|
||||
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
|
||||
self._extracted = True
|
||||
|
||||
def _reformat(self, data):
|
||||
try:
|
||||
soup = BeautifulSoup(data)
|
||||
except UnicodeEncodeError:
|
||||
# hit some strange encoding problems...
|
||||
print "Unable to parse html for cleaning, leaving it :("
|
||||
return data
|
||||
# nuke javascript...
|
||||
[s.extract() for s in soup('script')]
|
||||
# remove forward and back nav bars from the top/bottom of each page
|
||||
# cos they really fuck with the flow of things and generally waste space
|
||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||
# we'll have to do this manually...
|
||||
t = soup('table')
|
||||
if t:
|
||||
if (t[0].previousSibling is None
|
||||
or t[0].previousSibling.previousSibling is None):
|
||||
t[0].extract()
|
||||
if (t[-1].nextSibling is None
|
||||
or t[-1].nextSibling.nextSibling is None):
|
||||
t[-1].extract()
|
||||
# for some very odd reason each page's content appears to be in a table
|
||||
# too. and this table has sub-tables for random asides... grr.
|
||||
|
||||
# remove br at top of page if present after nav bars removed
|
||||
br = soup('br')
|
||||
if br:
|
||||
if check_all_prev_empty(br[0].previousSibling):
|
||||
br[0].extract()
|
||||
|
||||
# some images seem to be broken in some chm's :/
|
||||
for img in soup('img'):
|
||||
try:
|
||||
# some are supposedly "relative"... lies.
|
||||
while img['src'].startswith('../'): img['src'] = img['src'][3:]
|
||||
# some have ";<junk>" at the end.
|
||||
img['src'] = img['src'].split(';')[0]
|
||||
except KeyError:
|
||||
# and some don't even have a src= ?!
|
||||
pass
|
||||
# now give back some pretty html.
|
||||
return soup.prettify()
|
||||
|
||||
def Contents(self):
|
||||
if self._contents is not None:
|
||||
return self._contents
|
||||
paths = []
|
||||
def get_paths(chm, ui, ctx):
|
||||
# skip directories
|
||||
# note this path refers to the internal CHM structure
|
||||
if ui.path[-1] != '/':
|
||||
# and make paths relative
|
||||
paths.append(ui.path.lstrip('/'))
|
||||
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||
self._contents = paths
|
||||
return self._contents
|
||||
|
||||
def _ensure_dir(self, path):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.isdir(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
def extract_content(self, output_dir=os.getcwdu()):
|
||||
self.ExtractFiles(output_dir=output_dir)
|
||||
|
||||
|
||||
class CHMInput(InputFormatPlugin):
|
||||
|
||||
name = 'CHM Input'
|
||||
@ -215,12 +19,8 @@ class CHMInput(InputFormatPlugin):
|
||||
description = 'Convert CHM files to OEB'
|
||||
file_types = set(['chm'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='dummy_option', recommended_value=False,
|
||||
help=_('dummy option until real options are determined.')),
|
||||
])
|
||||
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||
from calibre.ebooks.chm.reader import CHMReader
|
||||
log.debug('Opening CHM file')
|
||||
rdr = CHMReader(chm_path, log)
|
||||
log.debug('Extracting CHM to %s' % output_dir)
|
||||
@ -230,37 +30,36 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.metadata.chm import get_metadata_
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
|
||||
log.debug('Processing CHM...')
|
||||
tdir = mkdtemp(prefix='chm2oeb_')
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = 'utf-8'
|
||||
no_images = False #options.no_images
|
||||
chm_name = stream.name
|
||||
#chm_data = stream.read()
|
||||
with TemporaryDirectory('chm2oeb') as tdir:
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = 'utf-8'
|
||||
no_images = False #options.no_images
|
||||
chm_name = stream.name
|
||||
#chm_data = stream.read()
|
||||
|
||||
#closing stream so CHM can be opened by external library
|
||||
stream.close()
|
||||
log.debug('tdir=%s' % tdir)
|
||||
log.debug('stream.name=%s' % stream.name)
|
||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
||||
mainpath = os.path.join(tdir, mainname)
|
||||
#closing stream so CHM can be opened by external library
|
||||
stream.close()
|
||||
log.debug('tdir=%s' % tdir)
|
||||
log.debug('stream.name=%s' % stream.name)
|
||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
||||
mainpath = os.path.join(tdir, mainname)
|
||||
|
||||
metadata = get_metadata_(tdir)
|
||||
metadata = get_metadata_(tdir)
|
||||
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
# try a custom conversion:
|
||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||
# try using html converter:
|
||||
htmlpath = self._create_html_root(mainpath, log)
|
||||
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
||||
options.debug_pipeline = odi
|
||||
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
|
||||
shutil.rmtree(tdir)
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
# try a custom conversion:
|
||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||
# try using html converter:
|
||||
htmlpath = self._create_html_root(mainpath, log)
|
||||
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
||||
options.debug_pipeline = odi
|
||||
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
|
||||
return oeb
|
||||
|
||||
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
||||
@ -369,6 +168,8 @@ class CHMInput(InputFormatPlugin):
|
||||
# check that node is a normal node (not a comment, DOCTYPE, etc.)
|
||||
# (normal nodes have string tags)
|
||||
if isinstance(node.tag, basestring):
|
||||
from calibre.ebooks.chm.reader import match_string
|
||||
|
||||
if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
|
||||
for child in node:
|
||||
if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
|
||||
|
207
src/calibre/ebooks/chm/reader.py
Normal file
207
src/calibre/ebooks/chm/reader.py
Normal file
@ -0,0 +1,207 @@
|
||||
from __future__ import with_statement
|
||||
''' CHM File decoding support '''
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, re
|
||||
from mimetypes import guess_type as guess_mimetype
|
||||
|
||||
from BeautifulSoup import BeautifulSoup, NavigableString
|
||||
|
||||
from calibre.utils.chm.chm import CHMFile
|
||||
from calibre.utils.chm.chmlib import (
|
||||
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
||||
chm_enumerate,
|
||||
)
|
||||
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
|
||||
|
||||
def match_string(s1, s2_already_lowered):
|
||||
if s1 is not None and s2_already_lowered is not None:
|
||||
if s1.lower()==s2_already_lowered:
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_all_prev_empty(tag):
|
||||
if tag is None:
|
||||
return True
|
||||
if tag.__class__ == NavigableString and not check_empty(tag):
|
||||
return False
|
||||
return check_all_prev_empty(tag.previousSibling)
|
||||
|
||||
def check_empty(s, rex = re.compile(r'\S')):
|
||||
return rex.search(s) is None
|
||||
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
||||
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
||||
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
|
||||
parser.add_option("-t", "--title", action="store", type="string", \
|
||||
dest="title", help=_("Set the book title"))
|
||||
parser.add_option('--title-sort', action='store', type='string', default=None,
|
||||
dest='title_sort', help=_('Set sort key for the title'))
|
||||
parser.add_option("-a", "--author", action="store", type="string", \
|
||||
dest="author", help=_("Set the author"))
|
||||
parser.add_option('--author-sort', action='store', type='string', default=None,
|
||||
dest='author_sort', help=_('Set sort key for the author'))
|
||||
parser.add_option("-c", "--category", action="store", type="string", \
|
||||
dest="category", help=_("The category this book belongs"
|
||||
" to. E.g.: History"))
|
||||
parser.add_option("--thumbnail", action="store", type="string", \
|
||||
dest="thumbnail", help=_("Path to a graphic that will be"
|
||||
" set as this files' thumbnail"))
|
||||
parser.add_option("--comment", action="store", type="string", \
|
||||
dest="freetext", help=_("Path to a txt file containing a comment."))
|
||||
parser.add_option("--get-thumbnail", action="store_true", \
|
||||
dest="get_thumbnail", default=False, \
|
||||
help=_("Extract thumbnail from LRF file"))
|
||||
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
|
||||
parser.add_option('--classification', default=None, help=_('Set the book classification'))
|
||||
parser.add_option('--creator', default=None, help=_('Set the book creator'))
|
||||
parser.add_option('--producer', default=None, help=_('Set the book producer'))
|
||||
parser.add_option('--get-cover', action='store_true', default=False,
|
||||
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
|
||||
parser.add_option('--bookid', action='store', type='string', default=None,
|
||||
dest='book_id', help=_('Set book ID'))
|
||||
parser.add_option('--font-delta', action='store', type='int', default=0,
|
||||
dest='font_delta', help=_('Set font delta'))
|
||||
return parser
|
||||
|
||||
class CHMError(Exception):
|
||||
pass
|
||||
|
||||
class CHMReader(CHMFile):
|
||||
def __init__(self, input, log):
|
||||
CHMFile.__init__(self)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
self._metadata = False
|
||||
self._extracted = False
|
||||
|
||||
# location of '.hhc' file, which is the CHM TOC.
|
||||
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
||||
self.hhc_path = self.root + ".hhc"
|
||||
|
||||
|
||||
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
||||
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
||||
self._playorder += 1
|
||||
for li in ul('li', recursive=False):
|
||||
href = li.object('param', {'name': 'Local'})[0]['value']
|
||||
if href.count('#'):
|
||||
href, frag = href.split('#')
|
||||
else:
|
||||
frag = None
|
||||
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
|
||||
#print "========>", name
|
||||
toc.add_item(href, frag, name, play_order=self._playorder)
|
||||
self._playorder += 1
|
||||
if li.ul:
|
||||
child = self._parse_toc(li.ul)
|
||||
child.parent = toc
|
||||
toc.append(child)
|
||||
#print toc
|
||||
return toc
|
||||
|
||||
|
||||
def GetFile(self, path):
|
||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||
# makes them relative. So we don't have to worry, re-add the leading /.
|
||||
# note this path refers to the internal CHM structure
|
||||
if path[0] != '/':
|
||||
path = '/' + path
|
||||
res, ui = self.ResolveObject(path)
|
||||
if res != CHM_RESOLVE_SUCCESS:
|
||||
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
||||
size, data = self.RetrieveObject(ui)
|
||||
if size == 0:
|
||||
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
||||
return data
|
||||
|
||||
def ExtractFiles(self, output_dir=os.getcwdu()):
|
||||
for path in self.Contents():
|
||||
lpath = os.path.join(output_dir, path)
|
||||
self._ensure_dir(lpath)
|
||||
data = self.GetFile(path)
|
||||
with open(lpath, 'wb') as f:
|
||||
if guess_mimetype(path)[0] == ('text/html'):
|
||||
data = self._reformat(data)
|
||||
f.write(data)
|
||||
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
|
||||
self._extracted = True
|
||||
|
||||
def _reformat(self, data):
|
||||
try:
|
||||
soup = BeautifulSoup(data)
|
||||
except UnicodeEncodeError:
|
||||
# hit some strange encoding problems...
|
||||
print "Unable to parse html for cleaning, leaving it :("
|
||||
return data
|
||||
# nuke javascript...
|
||||
[s.extract() for s in soup('script')]
|
||||
# remove forward and back nav bars from the top/bottom of each page
|
||||
# cos they really fuck with the flow of things and generally waste space
|
||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||
# we'll have to do this manually...
|
||||
t = soup('table')
|
||||
if t:
|
||||
if (t[0].previousSibling is None
|
||||
or t[0].previousSibling.previousSibling is None):
|
||||
t[0].extract()
|
||||
if (t[-1].nextSibling is None
|
||||
or t[-1].nextSibling.nextSibling is None):
|
||||
t[-1].extract()
|
||||
# for some very odd reason each page's content appears to be in a table
|
||||
# too. and this table has sub-tables for random asides... grr.
|
||||
|
||||
# remove br at top of page if present after nav bars removed
|
||||
br = soup('br')
|
||||
if br:
|
||||
if check_all_prev_empty(br[0].previousSibling):
|
||||
br[0].extract()
|
||||
|
||||
# some images seem to be broken in some chm's :/
|
||||
for img in soup('img'):
|
||||
try:
|
||||
# some are supposedly "relative"... lies.
|
||||
while img['src'].startswith('../'): img['src'] = img['src'][3:]
|
||||
# some have ";<junk>" at the end.
|
||||
img['src'] = img['src'].split(';')[0]
|
||||
except KeyError:
|
||||
# and some don't even have a src= ?!
|
||||
pass
|
||||
# now give back some pretty html.
|
||||
return soup.prettify()
|
||||
|
||||
def Contents(self):
|
||||
if self._contents is not None:
|
||||
return self._contents
|
||||
paths = []
|
||||
def get_paths(chm, ui, ctx):
|
||||
# skip directories
|
||||
# note this path refers to the internal CHM structure
|
||||
if ui.path[-1] != '/':
|
||||
# and make paths relative
|
||||
paths.append(ui.path.lstrip('/'))
|
||||
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||
self._contents = paths
|
||||
return self._contents
|
||||
|
||||
def _ensure_dir(self, path):
|
||||
dir = os.path.dirname(path)
|
||||
if not os.path.isdir(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
def extract_content(self, output_dir=os.getcwdu()):
|
||||
self.ExtractFiles(output_dir=output_dir)
|
||||
|
||||
|
||||
|
@ -731,7 +731,8 @@ OptionRecommendation(name='timestamp',
|
||||
zf = ZipFile(os.path.join(self.opts.debug_pipeline,
|
||||
'periodical.downloaded_recipe'), 'w')
|
||||
zf.add_dir(out_dir)
|
||||
self.input_plugin.save_download(zf)
|
||||
with self.input_plugin:
|
||||
self.input_plugin.save_download(zf)
|
||||
zf.close()
|
||||
|
||||
self.log.info('Input debug saved to:', out_dir)
|
||||
@ -780,28 +781,29 @@ OptionRecommendation(name='timestamp',
|
||||
self.ui_reporter(0.01, _('Converting input to HTML...'))
|
||||
ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
|
||||
self.input_plugin.report_progress = ir
|
||||
self.oeb = self.input_plugin(stream, self.opts,
|
||||
self.input_fmt, self.log,
|
||||
accelerators, tdir)
|
||||
if self.opts.debug_pipeline is not None:
|
||||
self.dump_input(self.oeb, tdir)
|
||||
if self.abort_after_input_dump:
|
||||
return
|
||||
if self.input_fmt in ('recipe', 'downloaded_recipe'):
|
||||
self.opts_to_mi(self.user_metadata)
|
||||
if not hasattr(self.oeb, 'manifest'):
|
||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
|
||||
self.input_plugin)
|
||||
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
|
||||
self.opts.is_image_collection = self.input_plugin.is_image_collection
|
||||
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
|
||||
self.flush()
|
||||
if self.opts.debug_pipeline is not None:
|
||||
out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
|
||||
self.dump_oeb(self.oeb, out_dir)
|
||||
self.log('Parsed HTML written to:', out_dir)
|
||||
self.input_plugin.specialize(self.oeb, self.opts, self.log,
|
||||
self.output_fmt)
|
||||
with self.input_plugin:
|
||||
self.oeb = self.input_plugin(stream, self.opts,
|
||||
self.input_fmt, self.log,
|
||||
accelerators, tdir)
|
||||
if self.opts.debug_pipeline is not None:
|
||||
self.dump_input(self.oeb, tdir)
|
||||
if self.abort_after_input_dump:
|
||||
return
|
||||
if self.input_fmt in ('recipe', 'downloaded_recipe'):
|
||||
self.opts_to_mi(self.user_metadata)
|
||||
if not hasattr(self.oeb, 'manifest'):
|
||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
|
||||
self.input_plugin)
|
||||
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
|
||||
self.opts.is_image_collection = self.input_plugin.is_image_collection
|
||||
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
|
||||
self.flush()
|
||||
if self.opts.debug_pipeline is not None:
|
||||
out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
|
||||
self.dump_oeb(self.oeb, out_dir)
|
||||
self.log('Parsed HTML written to:', out_dir)
|
||||
self.input_plugin.specialize(self.oeb, self.opts, self.log,
|
||||
self.output_fmt)
|
||||
|
||||
pr(0., _('Running transforms on ebook...'))
|
||||
|
||||
@ -891,7 +893,8 @@ OptionRecommendation(name='timestamp',
|
||||
our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
|
||||
self.output_plugin.report_progress = our
|
||||
our(0., _('Creating')+' %s'%self.output_plugin.name)
|
||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||
with self.output_plugin:
|
||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||
self.opts, self.log)
|
||||
self.ui_reporter(1.)
|
||||
run_plugins_on_postprocess(self.output, self.output_fmt)
|
||||
|
@ -157,11 +157,9 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
|
||||
self.workaround_ade_quirks()
|
||||
self.workaround_webkit_quirks()
|
||||
self.workaround_sony_quirks()
|
||||
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
|
||||
RescaleImages()(oeb, opts)
|
||||
|
||||
|
||||
from calibre.ebooks.oeb.transforms.split import Split
|
||||
split = Split(not self.opts.dont_split_on_page_breaks,
|
||||
max_flow_size=self.opts.flow_size*1024
|
||||
@ -170,6 +168,8 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
|
||||
self.insert_cover()
|
||||
|
||||
self.workaround_sony_quirks()
|
||||
|
||||
with TemporaryDirectory('_epub_output') as tdir:
|
||||
from calibre.customize.ui import plugin_for_output_format
|
||||
oeb_output = plugin_for_output_format('oeb')
|
||||
|
@ -20,7 +20,7 @@ from itertools import izip
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.constants import islinux
|
||||
from calibre.constants import islinux, isfreebsd
|
||||
from calibre import unicode_path
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
@ -346,7 +346,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
self.added_resources = {}
|
||||
self.log = log
|
||||
for path, href in htmlfile_map.items():
|
||||
if not islinux:
|
||||
if not (islinux or isfreebsd):
|
||||
path = path.lower()
|
||||
self.added_resources[path] = href
|
||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||
@ -417,7 +417,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
if os.path.isdir(link):
|
||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||
return link_
|
||||
if not islinux:
|
||||
if not (islinux or isfreebsd):
|
||||
link = link.lower()
|
||||
if link not in self.added_resources:
|
||||
bhref = os.path.basename(link)
|
||||
|
@ -215,6 +215,28 @@ def merge_results(one, two):
|
||||
else:
|
||||
one[idx].smart_update(x)
|
||||
|
||||
class MetadataSources(object):
|
||||
|
||||
def __init__(self, sources):
|
||||
self.sources = sources
|
||||
|
||||
def __enter__(self):
|
||||
for s in self.sources:
|
||||
s.__enter__()
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
for s in self.sources:
|
||||
s.__exit__()
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
for s in self.sources:
|
||||
s(*args, **kwargs)
|
||||
|
||||
def join(self):
|
||||
for s in self.sources:
|
||||
s.join()
|
||||
|
||||
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
||||
verbose=0):
|
||||
assert not(title is None and author is None and publisher is None and \
|
||||
@ -224,11 +246,10 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
||||
if isbn is not None:
|
||||
isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
|
||||
fetchers = list(metadata_sources(isbndb_key=isbndb_key))
|
||||
with MetadataSources(fetchers) as manager:
|
||||
manager(title, author, publisher, isbn, verbose)
|
||||
manager.join()
|
||||
|
||||
for fetcher in fetchers:
|
||||
fetcher(title, author, publisher, isbn, verbose)
|
||||
for fetcher in fetchers:
|
||||
fetcher.join()
|
||||
results = list(fetchers[0].results)
|
||||
for fetcher in fetchers[1:]:
|
||||
merge_results(results, fetcher.results)
|
||||
@ -243,10 +264,9 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
|
||||
def get_social_metadata(mi, verbose=0):
|
||||
from calibre.customize.ui import metadata_sources
|
||||
fetchers = list(metadata_sources(metadata_type='social'))
|
||||
for fetcher in fetchers:
|
||||
fetcher(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
|
||||
for fetcher in fetchers:
|
||||
fetcher.join()
|
||||
with MetadataSources(fetchers) as manager:
|
||||
manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
|
||||
manager.join()
|
||||
ratings, tags, comments = [], set([]), set([])
|
||||
for fetcher in fetchers:
|
||||
if fetcher.results:
|
||||
|
@ -4,13 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Read data from .mobi files
|
||||
'''
|
||||
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
import struct
|
||||
import textwrap
|
||||
import cStringIO
|
||||
import sys
|
||||
import functools, shutil, os, re, struct, textwrap, cStringIO, sys
|
||||
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
@ -620,6 +614,16 @@ class MobiReader(object):
|
||||
* opf.cover.split('/'))):
|
||||
opf.cover = None
|
||||
|
||||
cover = opf.cover
|
||||
if cover is not None:
|
||||
cover = cover.replace('/', os.sep)
|
||||
if os.path.exists(cover):
|
||||
ncover = 'images'+os.sep+'calibre_cover.jpg'
|
||||
if os.path.exists(ncover):
|
||||
os.remove(ncover)
|
||||
shutil.copyfile(cover, ncover)
|
||||
opf.cover = ncover.replace(os.sep, '/')
|
||||
|
||||
manifest = [(htmlfile, 'application/xhtml+xml'),
|
||||
(os.path.abspath('styles.css'), 'text/css')]
|
||||
bp = os.path.dirname(htmlfile)
|
||||
|
@ -173,7 +173,8 @@ class EbookIterator(object):
|
||||
plumber.opts.no_process = True
|
||||
|
||||
plumber.input_plugin.for_viewer = True
|
||||
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
||||
with plumber.input_plugin:
|
||||
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
|
||||
plumber.opts, plumber.input_fmt, self.log,
|
||||
{}, self.base)
|
||||
|
||||
|
@ -13,7 +13,7 @@ from functools import partial
|
||||
|
||||
from calibre.ebooks import ConversionError, DRMError
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre import isosx, iswindows, islinux
|
||||
from calibre import isosx, iswindows, islinux, isfreebsd
|
||||
from calibre import CurrentDir
|
||||
|
||||
PDFTOHTML = 'pdftohtml'
|
||||
@ -23,7 +23,7 @@ if isosx and hasattr(sys, 'frameworks_dir'):
|
||||
if iswindows and hasattr(sys, 'frozen'):
|
||||
PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
|
||||
popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
|
||||
if islinux and getattr(sys, 'frozen_path', False):
|
||||
if (islinux or isfreebsd) and getattr(sys, 'frozen_path', False):
|
||||
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
|
||||
|
||||
def pdftohtml(output_dir, pdf_path, no_images):
|
||||
|
@ -142,7 +142,7 @@ class RTFMLizer(object):
|
||||
def image_to_hexstring(self, data):
|
||||
im = Image.open(cStringIO.StringIO(data))
|
||||
data = cStringIO.StringIO()
|
||||
im.save(data, 'JPEG')
|
||||
im.convert('RGB').save(data, 'JPEG')
|
||||
data = data.getvalue()
|
||||
|
||||
raw_hex = ''
|
||||
|
@ -12,7 +12,7 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
|
||||
|
||||
ORG_NAME = 'KovidsBrain'
|
||||
APP_UID = 'libprs500'
|
||||
from calibre import islinux, iswindows, isosx
|
||||
from calibre import islinux, iswindows, isosx, isfreebsd
|
||||
from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
|
||||
from calibre.utils.localization import set_qt_translator
|
||||
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
|
||||
@ -579,7 +579,7 @@ _store_app = None
|
||||
|
||||
def is_ok_to_use_qt():
|
||||
global gui_thread, _store_app
|
||||
if islinux and ':' not in os.environ.get('DISPLAY', ''):
|
||||
if (islinux or isfreebsd) and ':' not in os.environ.get('DISPLAY', ''):
|
||||
return False
|
||||
if _store_app is None and QApplication.instance() is None:
|
||||
_store_app = QApplication([])
|
||||
|
@ -25,7 +25,7 @@ from calibre.ebooks import BOOK_EXTENSIONS
|
||||
from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, \
|
||||
authors_to_string, check_isbn
|
||||
from calibre.ebooks.metadata.library_thing import cover_from_isbn
|
||||
from calibre import islinux
|
||||
from calibre import islinux, isfreebsd
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.utils.config import prefs, tweaks
|
||||
from calibre.utils.date import qt_to_dt
|
||||
@ -389,8 +389,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
height_of_rest = self.frameGeometry().height() - self.cover.height()
|
||||
width_of_rest = self.frameGeometry().width() - self.cover.width()
|
||||
ag = QCoreApplication.instance().desktop().availableGeometry(self)
|
||||
self.cover.MAX_HEIGHT = ag.height()-(25 if islinux else 0)-height_of_rest
|
||||
self.cover.MAX_WIDTH = ag.width()-(25 if islinux else 0)-width_of_rest
|
||||
self.cover.MAX_HEIGHT = ag.height()-(25 if (islinux or isfreebsd) else 0)-height_of_rest
|
||||
self.cover.MAX_WIDTH = ag.width()-(25 if (islinux or isfreebsd) else 0)-width_of_rest
|
||||
if cover:
|
||||
pm = QPixmap()
|
||||
pm.loadFromData(cover)
|
||||
|
@ -5,7 +5,7 @@ import sys, logging, os, traceback, time
|
||||
from PyQt4.QtGui import QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon
|
||||
from PyQt4.QtCore import Qt, QObject, SIGNAL, QCoreApplication, QThread
|
||||
|
||||
from calibre import __appname__, setup_cli_handlers, islinux
|
||||
from calibre import __appname__, setup_cli_handlers, islinux, isfreebsd
|
||||
from calibre.ebooks.lrf.lrfparser import LRFDocument
|
||||
|
||||
from calibre.gui2 import ORG_NAME, APP_UID, error_dialog, \
|
||||
@ -258,7 +258,7 @@ def file_renderer(stream, opts, parent=None, logger=None):
|
||||
level = logging.DEBUG if opts.verbose else logging.INFO
|
||||
logger = logging.getLogger('lrfviewer')
|
||||
setup_cli_handlers(logger, level)
|
||||
if islinux:
|
||||
if islinux or isfreebsd:
|
||||
try: # Set lrfviewer as the default for LRF files for this user
|
||||
from subprocess import call
|
||||
call('xdg-mime default calibre-lrfviewer.desktop application/lrf', shell=True)
|
||||
@ -307,7 +307,7 @@ def main(args=sys.argv, logger=None):
|
||||
if hasattr(opts, 'help'):
|
||||
parser.print_help()
|
||||
return 1
|
||||
pid = os.fork() if islinux else -1
|
||||
pid = os.fork() if (islinux or isfreebsd) else -1
|
||||
if pid <= 0:
|
||||
app = Application(args)
|
||||
app.setWindowIcon(QIcon(I('viewer.svg')))
|
||||
|
@ -20,7 +20,7 @@ from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \
|
||||
info_dialog, error_dialog
|
||||
from calibre.ebooks.oeb.iterator import EbookIterator
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.constants import islinux
|
||||
from calibre.constants import islinux, isfreebsd
|
||||
from calibre.utils.config import Config, StringConfig, dynamic
|
||||
from calibre.gui2.search_box import SearchBox2
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
@ -686,7 +686,7 @@ View an ebook.
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
pid = os.fork() if False and islinux else -1
|
||||
pid = os.fork() if False and (islinux or isfreebsd) else -1
|
||||
if pid <= 0:
|
||||
app = Application(args)
|
||||
app.setWindowIcon(QIcon(I('viewer.svg')))
|
||||
|
@ -1294,7 +1294,7 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
|
||||
if entry_type:
|
||||
user_notes[location] = dict(type=entry_type, id=self.id,
|
||||
text=data[eo+8:eo+8+rec_len].decode('utf-16-be'))
|
||||
text=data[eo+8:eo+8+rec_len].decode('utf-16-be'))
|
||||
#print " %2d: %s %s" % (current_entry, entry_type,'at %d' % location if location else '')
|
||||
#if current_block == 'text_block':
|
||||
#self.textdump(text)
|
||||
@ -1307,12 +1307,17 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
while sig == 'BKMK':
|
||||
# Fix start location for Highlights using BKMK data
|
||||
end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
|
||||
#print "looking for end_loc %d in BKMK" % end_loc
|
||||
if end_loc in user_notes and user_notes[end_loc]['type'] != 'Note':
|
||||
start, = unpack('>I', data[eo+8:eo+12])
|
||||
user_notes[start] = user_notes[end_loc]
|
||||
user_notes.pop(end_loc)
|
||||
#print "changing start location of %d to %d" % (end_loc,start)
|
||||
else:
|
||||
# If a bookmark coincides with a user annotation, the locs could
|
||||
# be the same - cheat by nudging -1
|
||||
# Skip bookmark for last_read_location
|
||||
if end_loc != self.last_read_location:
|
||||
user_notes[end_loc - 1] = dict(type='Bookmark',id=self.id,text=None)
|
||||
rec_len, = unpack('>I', data[eo+4:eo+8])
|
||||
eo += rec_len + 8
|
||||
sig = data[eo:eo+4]
|
||||
@ -3361,7 +3366,7 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
self.ncxSoup = ncx_soup
|
||||
|
||||
def updateLibraryComments(self):
|
||||
# Push user notes back to library
|
||||
# Append user notes to library book['comments'], catalog book['description']
|
||||
from calibre.library.cli import send_message as calibre_send_message
|
||||
|
||||
if self.bookmarked_books:
|
||||
@ -3395,22 +3400,30 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
user_notes = self.bookmarked_books[id][0].user_notes
|
||||
annotations = []
|
||||
|
||||
'''
|
||||
spanTag = Tag(ka_soup, 'span')
|
||||
spanTag['style'] = 'font-style:italic;font-weight:bold'
|
||||
spanTag.insert(0,NavigableString("Kindle Annotations"))
|
||||
divTag.insert(dtc, spanTag)
|
||||
dtc += 1
|
||||
divTag.insert(dtc, Tag(ka_soup,'br'))
|
||||
dtc += 1
|
||||
'''
|
||||
if False:
|
||||
spanTag = Tag(ka_soup, 'span')
|
||||
spanTag['style'] = 'font-style:italic;font-weight:bold;text-align:right'
|
||||
spanTag.insert(0,NavigableString("Kindle Annotations"))
|
||||
divTag.insert(dtc, spanTag)
|
||||
dtc += 1
|
||||
divTag.insert(dtc, Tag(ka_soup,'br'))
|
||||
dtc += 1
|
||||
|
||||
# Add the annotations sorted by location
|
||||
# Italicize highlighted text
|
||||
for location in sorted(user_notes):
|
||||
annotations.append('<b>Location %d • %s</b><br />%s<br />' % \
|
||||
self.magicKindleLocationCalculator(location),
|
||||
user_notes[location]['type'],
|
||||
user_notes[location]['text'])
|
||||
if user_notes[location]['text']:
|
||||
annotations.append('<b>Location %d • %s</b><br />%s<br />' % \
|
||||
(self.magicKindleLocationCalculator(location),
|
||||
user_notes[location]['type'],
|
||||
user_notes[location]['text'] if \
|
||||
user_notes[location]['type'] == 'Note' else \
|
||||
'<i>%s</i>' % user_notes[location]['text']))
|
||||
else:
|
||||
annotations.append('<b>Location %d • %s</b><br />' % \
|
||||
(self.magicKindleLocationCalculator(location),
|
||||
user_notes[location]['type']))
|
||||
|
||||
for annotation in annotations:
|
||||
divTag.insert(dtc, annotation)
|
||||
dtc += 1
|
||||
|
@ -196,6 +196,8 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
|
||||
for x in data:
|
||||
if isinstance(x['fmt_epub'], unicode):
|
||||
x['fmt_epub'] = x['fmt_epub'].encode('utf-8')
|
||||
if isinstance(x['cover'], unicode):
|
||||
x['cover'] = x['cover'].encode('utf-8')
|
||||
template = MarkupTemplate(STANZA_TEMPLATE)
|
||||
return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle,
|
||||
sep=os.sep, quote=quote, updated=db.last_modified()).render('xml')
|
||||
|
@ -263,7 +263,7 @@ class ResultCache(SearchQueryParser):
|
||||
if item[loc].strip() != '':
|
||||
continue
|
||||
matches.add(item[0])
|
||||
break
|
||||
continue
|
||||
continue ### item is empty. No possible matches below
|
||||
|
||||
if q == 'true':
|
||||
|
@ -7,6 +7,7 @@ import sys, os, shutil, cPickle, textwrap, stat
|
||||
from subprocess import check_call
|
||||
|
||||
from calibre import __appname__, prints
|
||||
from calibre.constants import islinux, isfreebsd
|
||||
|
||||
|
||||
entry_points = {
|
||||
@ -128,20 +129,23 @@ class PostInstall:
|
||||
self.icon_resources = []
|
||||
self.menu_resources = []
|
||||
self.mime_resources = []
|
||||
self.setup_completion()
|
||||
self.setup_udev_rules()
|
||||
if islinux:
|
||||
self.setup_completion()
|
||||
self.setup_udev_rules()
|
||||
self.install_man_pages()
|
||||
self.setup_desktop_integration()
|
||||
if islinux:
|
||||
self.setup_desktop_integration()
|
||||
self.create_uninstaller()
|
||||
|
||||
from calibre.utils.config import config_dir
|
||||
if os.path.exists(config_dir):
|
||||
os.chdir(config_dir)
|
||||
for f in os.listdir('.'):
|
||||
if os.stat(f).st_uid == 0:
|
||||
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
|
||||
if os.stat(config_dir).st_uid == 0:
|
||||
os.rmdir(config_dir)
|
||||
if islinux:
|
||||
for f in os.listdir('.'):
|
||||
if os.stat(f).st_uid == 0:
|
||||
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
|
||||
if os.stat(config_dir).st_uid == 0:
|
||||
os.rmdir(config_dir)
|
||||
|
||||
if warn is None and self.warnings:
|
||||
self.info('There were %d warnings'%len(self.warnings))
|
||||
@ -318,7 +322,10 @@ class PostInstall:
|
||||
def install_man_pages(self):
|
||||
try:
|
||||
from calibre.utils.help2man import create_man_page
|
||||
manpath = os.path.join(self.opts.staging_sharedir, 'man/man1')
|
||||
if isfreebsd:
|
||||
manpath = os.path.join(self.opts.staging_root, 'man/man1')
|
||||
else:
|
||||
manpath = os.path.join(self.opts.staging_sharedir, 'man/man1')
|
||||
if not os.path.exists(manpath):
|
||||
os.makedirs(manpath)
|
||||
self.info('Installing MAN pages...')
|
||||
@ -331,7 +338,10 @@ class PostInstall:
|
||||
continue
|
||||
parser = parser()
|
||||
raw = create_man_page(prog, parser)
|
||||
manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2')
|
||||
if isfreebsd:
|
||||
manfile = os.path.join(manpath, prog+'.1')
|
||||
else:
|
||||
manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2')
|
||||
self.info('\tInstalling MAN page for', prog)
|
||||
open(manfile, 'wb').write(raw)
|
||||
self.manifest.append(manfile)
|
||||
|
@ -204,6 +204,10 @@ terminal. For example, you can start the GUI from the terminal as::
|
||||
|
||||
calibre-debug -g
|
||||
|
||||
Similarly, you can start the ebook-viewer as::
|
||||
|
||||
calibre-debug -w /path/to/file/to/be/viewed
|
||||
|
||||
Executing arbitrary scripts in the calibre python environment
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
34
src/calibre/utils/chm/__init__.py
Normal file
34
src/calibre/utils/chm/__init__.py
Normal file
@ -0,0 +1,34 @@
|
||||
## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||
|
||||
## pychm is free software; you can redistribute it and/or
|
||||
## modify it under the terms of the GNU General Public License as
|
||||
## published by the Free Software Foundation; either version 2 of the
|
||||
## License, or (at your option) any later version.
|
||||
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
## General Public License for more details.
|
||||
|
||||
## You should have received a copy of the GNU General Public
|
||||
## License along with this program; see the file COPYING. If not,
|
||||
## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
## Boston, MA 02111-1307, USA
|
||||
|
||||
## $Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $
|
||||
|
||||
'''
|
||||
chm - A package to manipulate CHM files
|
||||
|
||||
The chm package provides four modules: chm, chmlib, extra and
|
||||
_chmlib. _chmlib and chmlib are very low level libraries generated
|
||||
from SWIG interface files, and are simple wrappers around the API
|
||||
defined by the C library chmlib.
|
||||
The extra module adds full-text search support.
|
||||
the chm module provides some higher level classes to simplify
|
||||
access to the CHM files information.
|
||||
'''
|
||||
__all__ = ["chm", "chmlib", "_chmlib", "extra"]
|
||||
__version__ = "0.8.4"
|
||||
__revision__ = "$Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $"
|
||||
|
512
src/calibre/utils/chm/chm.py
Normal file
512
src/calibre/utils/chm/chm.py
Normal file
@ -0,0 +1,512 @@
|
||||
## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||
|
||||
## Based on code by:
|
||||
## Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net>
|
||||
|
||||
## pychm is free software; you can redistribute it and/or
|
||||
## modify it under the terms of the GNU General Public License as
|
||||
## published by the Free Software Foundation; either version 2 of the
|
||||
## License, or (at your option) any later version.
|
||||
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
## General Public License for more details.
|
||||
|
||||
## You should have received a copy of the GNU General Public
|
||||
## License along with this program; see the file COPYING. If not,
|
||||
## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
## Boston, MA 02111-1307, USA
|
||||
|
||||
## $Id: chm.py,v 1.12 2006/08/07 12:31:51 rubensr Exp $
|
||||
|
||||
'''
|
||||
chm - A high-level front end for the chmlib python module.
|
||||
|
||||
The chm module provides high level access to the functionality
|
||||
included in chmlib. It encapsulates functions in the CHMFile class, and
|
||||
provides some additional features, such as the ability to obtain
|
||||
the contents tree of a CHM archive.
|
||||
|
||||
'''
|
||||
|
||||
import array
|
||||
import string
|
||||
import sys
|
||||
|
||||
import calibre.utils.chm.chmlib as chmlib
|
||||
from calibre.constants import plugins
|
||||
|
||||
extra, extra_err = plugins['chm_extra']
|
||||
if extra_err:
|
||||
raise RuntimeError('Failed to load chm.extra: '+extra_err)
|
||||
|
||||
charset_table = {
|
||||
0 : 'iso8859_1', # ANSI_CHARSET
|
||||
238 : 'iso8859_2', # EASTEUROPE_CHARSET
|
||||
178 : 'iso8859_6', # ARABIC_CHARSET
|
||||
161 : 'iso8859_7', # GREEK_CHARSET
|
||||
177 : 'iso8859_8', # HEBREW_CHARSET
|
||||
162 : 'iso8859_9', # TURKISH_CHARSET
|
||||
222 : 'iso8859_11', # THAI_CHARSET - hmm not in python 2.2...
|
||||
186 : 'iso8859_13', # BALTIC_CHARSET
|
||||
204 : 'cp1251', # RUSSIAN_CHARSET
|
||||
255 : 'cp437', # OEM_CHARSET
|
||||
128 : 'cp932', # SHIFTJIS_CHARSET
|
||||
134 : 'cp936', # GB2312_CHARSET
|
||||
129 : 'cp949', # HANGUL_CHARSET
|
||||
136 : 'cp950', # CHINESEBIG5_CHARSET
|
||||
1 : None, # DEFAULT_CHARSET
|
||||
2 : None, # SYMBOL_CHARSET
|
||||
130 : None, # JOHAB_CHARSET
|
||||
163 : None, # VIETNAMESE_CHARSET
|
||||
77 : None, # MAC_CHARSET
|
||||
}
|
||||
|
||||
locale_table = {
|
||||
0x0436 : ('iso8859_1', "Afrikaans", "Western Europe & US"),
|
||||
0x041c : ('iso8859_2', "Albanian", "Central Europe"),
|
||||
0x0401 : ('iso8859_6', "Arabic_Saudi_Arabia", "Arabic"),
|
||||
0x0801 : ('iso8859_6', "Arabic_Iraq", "Arabic"),
|
||||
0x0c01 : ('iso8859_6', "Arabic_Egypt", "Arabic"),
|
||||
0x1001 : ('iso8859_6', "Arabic_Libya", "Arabic"),
|
||||
0x1401 : ('iso8859_6', "Arabic_Algeria", "Arabic"),
|
||||
0x1801 : ('iso8859_6', "Arabic_Morocco", "Arabic"),
|
||||
0x1c01 : ('iso8859_6', "Arabic_Tunisia", "Arabic"),
|
||||
0x2001 : ('iso8859_6', "Arabic_Oman", "Arabic"),
|
||||
0x2401 : ('iso8859_6', "Arabic_Yemen", "Arabic"),
|
||||
0x2801 : ('iso8859_6', "Arabic_Syria", "Arabic"),
|
||||
0x2c01 : ('iso8859_6', "Arabic_Jordan", "Arabic"),
|
||||
0x3001 : ('iso8859_6', "Arabic_Lebanon", "Arabic"),
|
||||
0x3401 : ('iso8859_6', "Arabic_Kuwait", "Arabic"),
|
||||
0x3801 : ('iso8859_6', "Arabic_UAE", "Arabic"),
|
||||
0x3c01 : ('iso8859_6', "Arabic_Bahrain", "Arabic"),
|
||||
0x4001 : ('iso8859_6', "Arabic_Qatar", "Arabic"),
|
||||
0x042b : (None, "Armenian","Armenian"),
|
||||
0x042c : ('iso8859_9', "Azeri_Latin", "Turkish"),
|
||||
0x082c : ('cp1251', "Azeri_Cyrillic", "Cyrillic"),
|
||||
0x042d : ('iso8859_1', "Basque", "Western Europe & US"),
|
||||
0x0423 : ('cp1251', "Belarusian", "Cyrillic"),
|
||||
0x0402 : ('cp1251', "Bulgarian", "Cyrillic"),
|
||||
0x0403 : ('iso8859_1', "Catalan", "Western Europe & US"),
|
||||
0x0404 : ('cp950', "Chinese_Taiwan", "Traditional Chinese"),
|
||||
0x0804 : ('cp936', "Chinese_PRC", "Simplified Chinese"),
|
||||
0x0c04 : ('cp950', "Chinese_Hong_Kong", "Traditional Chinese"),
|
||||
0x1004 : ('cp936', "Chinese_Singapore", "Simplified Chinese"),
|
||||
0x1404 : ('cp950', "Chinese_Macau", "Traditional Chinese"),
|
||||
0x041a : ('iso8859_2', "Croatian", "Central Europe"),
|
||||
0x0405 : ('iso8859_2', "Czech", "Central Europe"),
|
||||
0x0406 : ('iso8859_1', "Danish", "Western Europe & US"),
|
||||
0x0413 : ('iso8859_1', "Dutch_Standard", "Western Europe & US"),
|
||||
0x0813 : ('iso8859_1', "Dutch_Belgian", "Western Europe & US"),
|
||||
0x0409 : ('iso8859_1', "English_United_States", "Western Europe & US"),
|
||||
0x0809 : ('iso8859_1', "English_United_Kingdom", "Western Europe & US"),
|
||||
0x0c09 : ('iso8859_1', "English_Australian", "Western Europe & US"),
|
||||
0x1009 : ('iso8859_1', "English_Canadian", "Western Europe & US"),
|
||||
0x1409 : ('iso8859_1', "English_New_Zealand", "Western Europe & US"),
|
||||
0x1809 : ('iso8859_1', "English_Irish", "Western Europe & US"),
|
||||
0x1c09 : ('iso8859_1', "English_South_Africa", "Western Europe & US"),
|
||||
0x2009 : ('iso8859_1', "English_Jamaica", "Western Europe & US"),
|
||||
0x2409 : ('iso8859_1', "English_Caribbean", "Western Europe & US"),
|
||||
0x2809 : ('iso8859_1', "English_Belize", "Western Europe & US"),
|
||||
0x2c09 : ('iso8859_1', "English_Trinidad", "Western Europe & US"),
|
||||
0x3009 : ('iso8859_1', "English_Zimbabwe", "Western Europe & US"),
|
||||
0x3409 : ('iso8859_1', "English_Philippines", "Western Europe & US"),
|
||||
0x0425 : ('iso8859_13',"Estonian", "Baltic",),
|
||||
0x0438 : ('iso8859_1', "Faeroese", "Western Europe & US"),
|
||||
0x0429 : ('iso8859_6', "Farsi", "Arabic"),
|
||||
0x040b : ('iso8859_1', "Finnish", "Western Europe & US"),
|
||||
0x040c : ('iso8859_1', "French_Standard", "Western Europe & US"),
|
||||
0x080c : ('iso8859_1', "French_Belgian", "Western Europe & US"),
|
||||
0x0c0c : ('iso8859_1', "French_Canadian", "Western Europe & US"),
|
||||
0x100c : ('iso8859_1', "French_Swiss", "Western Europe & US"),
|
||||
0x140c : ('iso8859_1', "French_Luxembourg", "Western Europe & US"),
|
||||
0x180c : ('iso8859_1', "French_Monaco", "Western Europe & US"),
|
||||
0x0437 : (None, "Georgian", "Georgian"),
|
||||
0x0407 : ('iso8859_1', "German_Standard", "Western Europe & US"),
|
||||
0x0807 : ('iso8859_1', "German_Swiss", "Western Europe & US"),
|
||||
0x0c07 : ('iso8859_1', "German_Austrian", "Western Europe & US"),
|
||||
0x1007 : ('iso8859_1', "German_Luxembourg", "Western Europe & US"),
|
||||
0x1407 : ('iso8859_1', "German_Liechtenstein", "Western Europe & US"),
|
||||
0x0408 : ('iso8859_7', "Greek", "Greek"),
|
||||
0x040d : ('iso8859_8', "Hebrew", "Hebrew"),
|
||||
0x0439 : (None, "Hindi", "Indic"),
|
||||
0x040e : ('iso8859_2', "Hungarian", "Central Europe"),
|
||||
0x040f : ('iso8859_1', "Icelandic", "Western Europe & US"),
|
||||
0x0421 : ('iso8859_1', "Indonesian", "Western Europe & US"),
|
||||
0x0410 : ('iso8859_1', "Italian_Standard", "Western Europe & US"),
|
||||
0x0810 : ('iso8859_1', "Italian_Swiss", "Western Europe & US"),
|
||||
0x0411 : ('cp932', "Japanese", "Japanese"),
|
||||
0x043f : ('cp1251', "Kazakh", "Cyrillic"),
|
||||
0x0457 : (None, "Konkani", "Indic"),
|
||||
0x0412 : ('cp949', "Korean", "Korean"),
|
||||
0x0426 : ('iso8859_13',"Latvian", "Baltic",),
|
||||
0x0427 : ('iso8859_13',"Lithuanian", "Baltic",),
|
||||
0x042f : ('cp1251', "Macedonian", "Cyrillic"),
|
||||
0x043e : ('iso8859_1', "Malay_Malaysia", "Western Europe & US"),
|
||||
0x083e : ('iso8859_1', "Malay_Brunei_Darussalam", "Western Europe & US"),
|
||||
0x044e : (None, "Marathi", "Indic"),
|
||||
0x0414 : ('iso8859_1', "Norwegian_Bokmal", "Western Europe & US"),
|
||||
0x0814 : ('iso8859_1', "Norwegian_Nynorsk", "Western Europe & US"),
|
||||
0x0415 : ('iso8859_2', "Polish", "Central Europe"),
|
||||
0x0416 : ('iso8859_1', "Portuguese_Brazilian", "Western Europe & US"),
|
||||
0x0816 : ('iso8859_1', "Portuguese_Standard", "Western Europe & US"),
|
||||
0x0418 : ('iso8859_2', "Romanian", "Central Europe"),
|
||||
0x0419 : ('cp1251', "Russian", "Cyrillic"),
|
||||
0x044f : (None, "Sanskrit", "Indic"),
|
||||
0x081a : ('iso8859_2', "Serbian_Latin", "Central Europe"),
|
||||
0x0c1a : ('cp1251', "Serbian_Cyrillic", "Cyrillic"),
|
||||
0x041b : ('iso8859_2', "Slovak", "Central Europe"),
|
||||
0x0424 : ('iso8859_2', "Slovenian", "Central Europe"),
|
||||
0x040a : ('iso8859_1', "Spanish_Trad_Sort", "Western Europe & US"),
|
||||
0x080a : ('iso8859_1', "Spanish_Mexican", "Western Europe & US"),
|
||||
0x0c0a : ('iso8859_1', "Spanish_Modern_Sort", "Western Europe & US"),
|
||||
0x100a : ('iso8859_1', "Spanish_Guatemala", "Western Europe & US"),
|
||||
0x140a : ('iso8859_1', "Spanish_Costa_Rica", "Western Europe & US"),
|
||||
0x180a : ('iso8859_1', "Spanish_Panama", "Western Europe & US"),
|
||||
0x1c0a : ('iso8859_1', "Spanish_Dominican_Repub", "Western Europe & US"),
|
||||
0x200a : ('iso8859_1', "Spanish_Venezuela", "Western Europe & US"),
|
||||
0x240a : ('iso8859_1', "Spanish_Colombia", "Western Europe & US"),
|
||||
0x280a : ('iso8859_1', "Spanish_Peru", "Western Europe & US"),
|
||||
0x2c0a : ('iso8859_1', "Spanish_Argentina", "Western Europe & US"),
|
||||
0x300a : ('iso8859_1', "Spanish_Ecuador", "Western Europe & US"),
|
||||
0x340a : ('iso8859_1', "Spanish_Chile", "Western Europe & US"),
|
||||
0x380a : ('iso8859_1', "Spanish_Uruguay", "Western Europe & US"),
|
||||
0x3c0a : ('iso8859_1', "Spanish_Paraguay", "Western Europe & US"),
|
||||
0x400a : ('iso8859_1', "Spanish_Bolivia", "Western Europe & US"),
|
||||
0x440a : ('iso8859_1', "Spanish_El_Salvador", "Western Europe & US"),
|
||||
0x480a : ('iso8859_1', "Spanish_Honduras", "Western Europe & US"),
|
||||
0x4c0a : ('iso8859_1', "Spanish_Nicaragua", "Western Europe & US"),
|
||||
0x500a : ('iso8859_1', "Spanish_Puerto_Rico", "Western Europe & US"),
|
||||
0x0441 : ('iso8859_1', "Swahili", "Western Europe & US"),
|
||||
0x041d : ('iso8859_1', "Swedish", "Western Europe & US"),
|
||||
0x081d : ('iso8859_1', "Swedish_Finland", "Western Europe & US"),
|
||||
0x0449 : (None, "Tamil", "Indic"),
|
||||
0x0444 : ('cp1251', "Tatar", "Cyrillic"),
|
||||
0x041e : ('iso8859_11',"Thai", "Thai"),
|
||||
0x041f : ('iso8859_9', "Turkish", "Turkish"),
|
||||
0x0422 : ('cp1251', "Ukrainian", "Cyrillic"),
|
||||
0x0420 : ('iso8859_6', "Urdu", "Arabic"),
|
||||
0x0443 : ('iso8859_9', "Uzbek_Latin", "Turkish"),
|
||||
0x0843 : ('cp1251', "Uzbek_Cyrillic", "Cyrillic"),
|
||||
0x042a : (None, "Vietnamese", "Vietnamese")
|
||||
}
|
||||
|
||||
class CHMFile:
|
||||
"A class to manage access to CHM files."
|
||||
filename = ""
|
||||
file = None
|
||||
title = ""
|
||||
home = "/"
|
||||
index = None
|
||||
topics = None
|
||||
encoding = None
|
||||
lcid = None
|
||||
binaryindex = None
|
||||
|
||||
def __init__(self):
|
||||
self.searchable = 0
|
||||
|
||||
def LoadCHM(self, archiveName):
|
||||
'''Loads a CHM archive.
|
||||
This function will also call GetArchiveInfo to obtain information
|
||||
such as the index file name and the topics file. It returns 1 on
|
||||
success, and 0 if it fails.
|
||||
'''
|
||||
if (self.filename != None):
|
||||
self.CloseCHM()
|
||||
|
||||
self.file = chmlib.chm_open(archiveName)
|
||||
if (self.file == None):
|
||||
return 0
|
||||
|
||||
self.filename = archiveName
|
||||
self.GetArchiveInfo()
|
||||
|
||||
return 1
|
||||
|
||||
def CloseCHM(self):
|
||||
'''Closes the CHM archive.
|
||||
This function will close the CHM file, if it is open. All variables
|
||||
are also reset.
|
||||
'''
|
||||
if (self.filename != None):
|
||||
chmlib.chm_close(self.file)
|
||||
self.file = None
|
||||
self.filename = ''
|
||||
self.title = ""
|
||||
self.home = "/"
|
||||
self.index = None
|
||||
self.topics = None
|
||||
self.encoding = None
|
||||
|
||||
def GetArchiveInfo(self):
|
||||
'''Obtains information on CHM archive.
|
||||
This function checks the /#SYSTEM file inside the CHM archive to
|
||||
obtain the index, home page, topics, encoding and title. It is called
|
||||
from LoadCHM.
|
||||
'''
|
||||
|
||||
#extra.is_searchable crashed...
|
||||
#self.searchable = extra.is_searchable (self.file)
|
||||
self.searchable = False
|
||||
self.lcid = None
|
||||
|
||||
result, ui = chmlib.chm_resolve_object(self.file, '/#SYSTEM')
|
||||
if (result != chmlib.CHM_RESOLVE_SUCCESS):
|
||||
sys.stderr.write('GetArchiveInfo: #SYSTEM does not exist\n')
|
||||
return 0
|
||||
|
||||
size, text = chmlib.chm_retrieve_object(self.file, ui, 4l, ui.length)
|
||||
if (size == 0):
|
||||
sys.stderr.write('GetArchiveInfo: file size = 0\n')
|
||||
return 0
|
||||
|
||||
buff = array.array('B', text)
|
||||
|
||||
index = 0
|
||||
while (index < size):
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
|
||||
if (cursor == 0):
|
||||
index += 2
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
index += 2
|
||||
self.topics = '/' + text[index:index+cursor-1]
|
||||
elif (cursor == 1):
|
||||
index += 2
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
index += 2
|
||||
self.index = '/' + text[index:index+cursor-1]
|
||||
elif (cursor == 2):
|
||||
index += 2
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
index += 2
|
||||
self.home = '/' + text[index:index+cursor-1]
|
||||
elif (cursor == 3):
|
||||
index += 2
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
index += 2
|
||||
self.title = text[index:index+cursor-1]
|
||||
elif (cursor == 4):
|
||||
index += 2
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
index += 2
|
||||
self.lcid = buff[index] + (buff[index+1] * 256)
|
||||
elif (cursor == 6):
|
||||
index += 2
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
index += 2
|
||||
tmp = text[index:index+cursor-1]
|
||||
if not self.topics:
|
||||
tmp1 = '/' + tmp + '.hhc'
|
||||
tmp2 = '/' + tmp + '.hhk'
|
||||
res1, ui1 = chmlib.chm_resolve_object(self.file, tmp1)
|
||||
res2, ui2 = chmlib.chm_resolve_object(self.file, tmp2)
|
||||
if (not self.topics) and \
|
||||
(res1 == chmlib.CHM_RESOLVE_SUCCESS):
|
||||
self.topics = '/' + tmp + '.hhc'
|
||||
if (not self.index) and \
|
||||
(res2 == chmlib.CHM_RESOLVE_SUCCESS):
|
||||
self.index = '/' + tmp + '.hhk'
|
||||
elif (cursor == 16):
|
||||
index += 2
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
index += 2
|
||||
self.encoding = text[index:index+cursor-1]
|
||||
else:
|
||||
index += 2
|
||||
cursor = buff[index] + (buff[index+1] * 256)
|
||||
index += 2
|
||||
index += cursor
|
||||
|
||||
self.GetWindowsInfo()
|
||||
|
||||
if not self.lcid:
|
||||
self.lcid = extra.get_lcid (self.file)
|
||||
|
||||
return 1
|
||||
|
||||
def GetTopicsTree(self):
|
||||
'''Reads and returns the topics tree.
|
||||
This auxiliary function reads and returns the topics tree file
|
||||
contents for the CHM archive.
|
||||
'''
|
||||
if (self.topics == None):
|
||||
return None
|
||||
|
||||
if self.topics:
|
||||
res, ui = chmlib.chm_resolve_object(self.file, self.topics)
|
||||
if (res != chmlib.CHM_RESOLVE_SUCCESS):
|
||||
return None
|
||||
|
||||
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
|
||||
if (size == 0):
|
||||
sys.stderr.write('GetTopicsTree: file size = 0\n')
|
||||
return None
|
||||
return text
|
||||
|
||||
def GetIndex(self):
|
||||
'''Reads and returns the index tree.
|
||||
This auxiliary function reads and returns the index tree file
|
||||
contents for the CHM archive.
|
||||
'''
|
||||
if (self.index == None):
|
||||
return None
|
||||
|
||||
if self.index:
|
||||
res, ui = chmlib.chm_resolve_object(self.file, self.index)
|
||||
if (res != chmlib.CHM_RESOLVE_SUCCESS):
|
||||
return None
|
||||
|
||||
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
|
||||
if (size == 0):
|
||||
sys.stderr.write('GetIndex: file size = 0\n')
|
||||
return None
|
||||
return text
|
||||
|
||||
def ResolveObject(self, document):
|
||||
'''Tries to locate a document in the archive.
|
||||
This function tries to locate the document inside the archive. It
|
||||
returns a tuple where the first element is zero if the function
|
||||
was successful, and the second is the UnitInfo for that document.
|
||||
The UnitInfo is used to retrieve the document contents
|
||||
'''
|
||||
if self.file:
|
||||
#path = os.path.abspath(document)
|
||||
path = document
|
||||
return chmlib.chm_resolve_object(self.file, path)
|
||||
else:
|
||||
return (1, None)
|
||||
|
||||
def RetrieveObject(self, ui, start = -1, length = -1):
|
||||
'''Retrieves the contents of a document.
|
||||
This function takes a UnitInfo and two optional arguments, the first
|
||||
being the start address and the second is the length. These define
|
||||
the amount of data to be read from the archive.
|
||||
'''
|
||||
if self.file and ui:
|
||||
if length == -1:
|
||||
len = ui.length
|
||||
else:
|
||||
len = length
|
||||
if start == -1:
|
||||
st = 0l
|
||||
else:
|
||||
st = long(start)
|
||||
return chmlib.chm_retrieve_object(self.file, ui, st, len)
|
||||
else:
|
||||
return (0, '')
|
||||
|
||||
def Search(self, text, wholewords=0, titleonly=0):
|
||||
'''Performs full-text search on the archive.
|
||||
The first parameter is the word to look for, the second
|
||||
indicates if the search should be for whole words only, and
|
||||
the third parameter indicates if the search should be
|
||||
restricted to page titles.
|
||||
This method will return a tuple, the first item
|
||||
indicating if the search results were partial, and the second
|
||||
item being a dictionary containing the results.'''
|
||||
if text and text != '' and self.file:
|
||||
return extra.search (self.file, text, wholewords,
|
||||
titleonly)
|
||||
else:
|
||||
return None
|
||||
|
||||
def IsSearchable(self):
|
||||
'''Indicates if the full-text search is available for this
|
||||
archive - this flag is updated when GetArchiveInfo is called'''
|
||||
return self.searchable
|
||||
|
||||
def GetEncoding(self):
|
||||
'''Returns a string that can be used with the codecs python package
|
||||
to encode or decode the files in the chm archive. If an error is
|
||||
found, or if it is not possible to find the encoding, None is
|
||||
returned.'''
|
||||
if self.encoding:
|
||||
vals = string.split(self.encoding, ',')
|
||||
if len(vals) > 2:
|
||||
try:
|
||||
return charset_table[int(vals[2])]
|
||||
except KeyError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def GetLCID(self):
|
||||
'''Returns the archive Locale ID'''
|
||||
if self.lcid in locale_table:
|
||||
return locale_table[self.lcid]
|
||||
else:
|
||||
return None
|
||||
|
||||
def GetDWORD(self, buff, idx=0):
|
||||
'''Internal method.
|
||||
Reads a double word (4 bytes) from a buffer.
|
||||
'''
|
||||
result = buff[idx] + (buff[idx+1]<<8) + (buff[idx+2]<<16) + \
|
||||
(buff[idx+3]<<24)
|
||||
|
||||
if result == 0xFFFFFFFF:
|
||||
result = 0
|
||||
|
||||
return result
|
||||
|
||||
def GetString(self, text, idx):
|
||||
'''Internal method.
|
||||
Retrieves a string from the #STRINGS buffer.
|
||||
'''
|
||||
next = string.find(text, '\x00', idx)
|
||||
chunk = text[idx:next]
|
||||
return chunk
|
||||
|
||||
def GetWindowsInfo(self):
|
||||
'''Gets information from the #WINDOWS file.
|
||||
Checks the #WINDOWS file to see if it has any info that was
|
||||
not found in #SYSTEM (topics, index or default page.
|
||||
'''
|
||||
result, ui = chmlib.chm_resolve_object(self.file, '/#WINDOWS')
|
||||
if (result != chmlib.CHM_RESOLVE_SUCCESS):
|
||||
return -1
|
||||
|
||||
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, 8)
|
||||
if (size < 8):
|
||||
return -2
|
||||
|
||||
buff = array.array('B', text)
|
||||
num_entries = self.GetDWORD(buff, 0)
|
||||
entry_size = self.GetDWORD(buff, 4)
|
||||
|
||||
if num_entries < 1:
|
||||
return -3
|
||||
|
||||
size, text = chmlib.chm_retrieve_object(self.file, ui, 8l, entry_size)
|
||||
if (size < entry_size):
|
||||
return -4
|
||||
|
||||
buff = array.array('B', text)
|
||||
toc_index = self.GetDWORD(buff, 0x60)
|
||||
idx_index = self.GetDWORD(buff, 0x64)
|
||||
dft_index = self.GetDWORD(buff, 0x68)
|
||||
|
||||
result, ui = chmlib.chm_resolve_object(self.file, '/#STRINGS')
|
||||
if (result != chmlib.CHM_RESOLVE_SUCCESS):
|
||||
return -5
|
||||
|
||||
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
|
||||
if (size == 0):
|
||||
return -6
|
||||
|
||||
if (not self.topics):
|
||||
self.topics = self.GetString(text, toc_index)
|
||||
if not self.topics.startswith("/"):
|
||||
self.topics = "/" + self.topics
|
||||
|
||||
if (not self.index):
|
||||
self.index = self.GetString(text, idx_index)
|
||||
if not self.index.startswith("/"):
|
||||
self.index = "/" + self.index
|
||||
|
||||
if (dft_index != 0):
|
||||
self.home = self.GetString(text, dft_index)
|
||||
if not self.home.startswith("/"):
|
||||
self.home = "/" + self.home
|
100
src/calibre/utils/chm/chmlib.py
Normal file
100
src/calibre/utils/chm/chmlib.py
Normal file
@ -0,0 +1,100 @@
|
||||
# This file was created automatically by SWIG.
|
||||
# Don't modify this file, modify the SWIG interface instead.
|
||||
# This file is compatible with both classic and new-style classes.
|
||||
|
||||
from calibre.constants import plugins
|
||||
|
||||
_chmlib, chmlib_err = plugins['chmlib']
|
||||
|
||||
if chmlib_err:
|
||||
raise RuntimeError('Failed to load chmlib: '+chmlib_err)
|
||||
|
||||
def _swig_setattr(self,class_type,name,value):
|
||||
if (name == "this"):
|
||||
if isinstance(value, class_type):
|
||||
self.__dict__[name] = value.this
|
||||
if hasattr(value,"thisown"): self.__dict__["thisown"] = value.thisown
|
||||
del value.thisown
|
||||
return
|
||||
method = class_type.__swig_setmethods__.get(name,None)
|
||||
if method: return method(self,value)
|
||||
self.__dict__[name] = value
|
||||
|
||||
def _swig_getattr(self,class_type,name):
|
||||
method = class_type.__swig_getmethods__.get(name,None)
|
||||
if method: return method(self)
|
||||
raise AttributeError,name
|
||||
|
||||
import types
|
||||
try:
|
||||
_object = types.ObjectType
|
||||
_newclass = 1
|
||||
except AttributeError:
|
||||
class _object : pass
|
||||
_newclass = 0
|
||||
|
||||
|
||||
CHM_UNCOMPRESSED = _chmlib.CHM_UNCOMPRESSED
|
||||
CHM_COMPRESSED = _chmlib.CHM_COMPRESSED
|
||||
CHM_MAX_PATHLEN = _chmlib.CHM_MAX_PATHLEN
|
||||
class chmUnitInfo(_object):
|
||||
__swig_setmethods__ = {}
|
||||
__setattr__ = lambda self, name, value: _swig_setattr(self, chmUnitInfo, name, value)
|
||||
__swig_getmethods__ = {}
|
||||
__getattr__ = lambda self, name: _swig_getattr(self, chmUnitInfo, name)
|
||||
__swig_setmethods__["start"] = _chmlib.chmUnitInfo_start_set
|
||||
__swig_getmethods__["start"] = _chmlib.chmUnitInfo_start_get
|
||||
if _newclass:start = property(_chmlib.chmUnitInfo_start_get,_chmlib.chmUnitInfo_start_set)
|
||||
__swig_setmethods__["length"] = _chmlib.chmUnitInfo_length_set
|
||||
__swig_getmethods__["length"] = _chmlib.chmUnitInfo_length_get
|
||||
if _newclass:length = property(_chmlib.chmUnitInfo_length_get,_chmlib.chmUnitInfo_length_set)
|
||||
__swig_setmethods__["space"] = _chmlib.chmUnitInfo_space_set
|
||||
__swig_getmethods__["space"] = _chmlib.chmUnitInfo_space_get
|
||||
if _newclass:space = property(_chmlib.chmUnitInfo_space_get,_chmlib.chmUnitInfo_space_set)
|
||||
__swig_setmethods__["path"] = _chmlib.chmUnitInfo_path_set
|
||||
__swig_getmethods__["path"] = _chmlib.chmUnitInfo_path_get
|
||||
if _newclass:path = property(_chmlib.chmUnitInfo_path_get,_chmlib.chmUnitInfo_path_set)
|
||||
def __init__(self,*args):
|
||||
_swig_setattr(self, chmUnitInfo, 'this', apply(_chmlib.new_chmUnitInfo,args))
|
||||
_swig_setattr(self, chmUnitInfo, 'thisown', 1)
|
||||
def __del__(self, destroy= _chmlib.delete_chmUnitInfo):
|
||||
try:
|
||||
if self.thisown: destroy(self)
|
||||
except: pass
|
||||
def __repr__(self):
|
||||
return "<C chmUnitInfo instance at %s>" % (self.this,)
|
||||
|
||||
class chmUnitInfoPtr(chmUnitInfo):
|
||||
def __init__(self,this):
|
||||
_swig_setattr(self, chmUnitInfo, 'this', this)
|
||||
if not hasattr(self,"thisown"): _swig_setattr(self, chmUnitInfo, 'thisown', 0)
|
||||
_swig_setattr(self, chmUnitInfo,self.__class__,chmUnitInfo)
|
||||
_chmlib.chmUnitInfo_swigregister(chmUnitInfoPtr)
|
||||
|
||||
chm_open = _chmlib.chm_open
|
||||
|
||||
chm_close = _chmlib.chm_close
|
||||
|
||||
CHM_PARAM_MAX_BLOCKS_CACHED = _chmlib.CHM_PARAM_MAX_BLOCKS_CACHED
|
||||
chm_set_param = _chmlib.chm_set_param
|
||||
|
||||
CHM_RESOLVE_SUCCESS = _chmlib.CHM_RESOLVE_SUCCESS
|
||||
CHM_RESOLVE_FAILURE = _chmlib.CHM_RESOLVE_FAILURE
|
||||
chm_resolve_object = _chmlib.chm_resolve_object
|
||||
|
||||
chm_retrieve_object = _chmlib.chm_retrieve_object
|
||||
|
||||
CHM_ENUMERATE_NORMAL = _chmlib.CHM_ENUMERATE_NORMAL
|
||||
CHM_ENUMERATE_META = _chmlib.CHM_ENUMERATE_META
|
||||
CHM_ENUMERATE_SPECIAL = _chmlib.CHM_ENUMERATE_SPECIAL
|
||||
CHM_ENUMERATE_FILES = _chmlib.CHM_ENUMERATE_FILES
|
||||
CHM_ENUMERATE_DIRS = _chmlib.CHM_ENUMERATE_DIRS
|
||||
CHM_ENUMERATE_ALL = _chmlib.CHM_ENUMERATE_ALL
|
||||
CHM_ENUMERATOR_FAILURE = _chmlib.CHM_ENUMERATOR_FAILURE
|
||||
CHM_ENUMERATOR_CONTINUE = _chmlib.CHM_ENUMERATOR_CONTINUE
|
||||
CHM_ENUMERATOR_SUCCESS = _chmlib.CHM_ENUMERATOR_SUCCESS
|
||||
chm_enumerate = _chmlib.chm_enumerate
|
||||
|
||||
chm_enumerate_dir = _chmlib.chm_enumerate_dir
|
||||
|
||||
|
759
src/calibre/utils/chm/extra.c
Normal file
759
src/calibre/utils/chm/extra.c
Normal file
@ -0,0 +1,759 @@
|
||||
/*
|
||||
* extra.c - full-text search support for pychm
|
||||
*
|
||||
* Copyright (C) 2004 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Author: Rubens Ramos <rubensr@users.sourceforge.net>
|
||||
*
|
||||
* Heavily based on work done by:
|
||||
* Pabs <pabs@zip.to> - chmdeco
|
||||
* Razvan Cojocaru <razvanco@gmx.net> - xCHM
|
||||
*
|
||||
*/
|
||||
|
||||
#include "chm_lib.h"
|
||||
#ifdef __PYTHON__
|
||||
#include "Python.h"
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#define PyObject void
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include "stdint.h"
|
||||
#define strcasecmp stricmp
|
||||
#define strncasecmp strnicmp
|
||||
#else
|
||||
#include <inttypes.h>
|
||||
#include <strings.h>
|
||||
#endif
|
||||
|
||||
#if defined( _MSC_VER ) && !defined( __cplusplus )
|
||||
# define inline __inline
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(__WIN32__)
|
||||
# if defined(_MSC_VER)
|
||||
# if defined(STATIC_LINKED)
|
||||
# define MODEXPORT(a) a
|
||||
# define MODIMPORT(a) extern a
|
||||
# else
|
||||
# define MODEXPORT(a) __declspec(dllexport) a
|
||||
# define MODIMPORT(a) extern a
|
||||
# endif
|
||||
# else
|
||||
# if defined(__BORLANDC__)
|
||||
# define MODEXPORT(a) a _export
|
||||
# define MODIMPORT(a) a _export
|
||||
# else
|
||||
# define MODEXPORT(a) a
|
||||
# define MODIMPORT(a) a
|
||||
# endif
|
||||
# endif
|
||||
#else
|
||||
# define MODEXPORT(a) a
|
||||
# define MODIMPORT(a) a
|
||||
#endif
|
||||
|
||||
#define false 0
|
||||
#define true 1
|
||||
|
||||
#define FTS_HEADER_LEN 0x32
|
||||
#define TOPICS_ENTRY_LEN 16
|
||||
#define COMMON_BUF_LEN 1025
|
||||
|
||||
#define FREE(x) free (x); x = NULL
|
||||
|
||||
inline uint16_t
|
||||
get_uint16 (uint8_t* b) {
|
||||
return b[0] |
|
||||
b[1]<<8;
|
||||
}
|
||||
|
||||
inline uint32_t
|
||||
get_uint32 (uint8_t* b) {
|
||||
return b[0] |
|
||||
b[1]<<8 |
|
||||
b[2]<<16 |
|
||||
b[3]<<24;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
get_uint64 (uint8_t* b) {
|
||||
return b[0] |
|
||||
b[1]<<8 |
|
||||
b[2]<<16 |
|
||||
b[3]<<24 |
|
||||
(uint64_t) b[4]<<32 |
|
||||
(uint64_t) b[5]<<40 |
|
||||
(uint64_t) b[6]<<48 |
|
||||
(uint64_t) b[7]<<56;
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
be_encint (unsigned char *buffer, size_t *length)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int shift=0;
|
||||
*length = 0;
|
||||
|
||||
do {
|
||||
result |= ((*buffer) & 0x7f) << shift;
|
||||
shift += 7;
|
||||
*length = *length + 1;
|
||||
|
||||
} while (*(buffer++) & 0x80);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
Finds the first unset bit in memory. Returns the number of set bits found.
|
||||
Returns -1 if the buffer runs out before we find an unset bit.
|
||||
*/
|
||||
inline int
|
||||
ffus (unsigned char* byte, int* bit, size_t *length) {
|
||||
int bits = 0;
|
||||
*length = 0;
|
||||
|
||||
while(*byte & (1 << *bit)){
|
||||
if(*bit)
|
||||
--(*bit);
|
||||
else {
|
||||
++byte;
|
||||
++(*length);
|
||||
*bit = 7;
|
||||
}
|
||||
++bits;
|
||||
}
|
||||
|
||||
if(*bit)
|
||||
--(*bit);
|
||||
else {
|
||||
++(*length);
|
||||
*bit = 7;
|
||||
}
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
|
||||
inline uint64_t
|
||||
sr_int(unsigned char* byte, int* bit,
|
||||
unsigned char s, unsigned char r, size_t *length)
|
||||
{
|
||||
uint64_t ret;
|
||||
unsigned char mask;
|
||||
int n, n_bits, num_bits, base, count;
|
||||
size_t fflen;
|
||||
|
||||
*length = 0;
|
||||
|
||||
if(!bit || *bit > 7 || s != 2)
|
||||
return ~(uint64_t)0;
|
||||
ret = 0;
|
||||
|
||||
count = ffus(byte, bit, &fflen);
|
||||
*length += fflen;
|
||||
byte += *length;
|
||||
|
||||
n_bits = n = r + (count ? count-1 : 0) ;
|
||||
|
||||
while (n > 0) {
|
||||
num_bits = n > *bit ? *bit : n-1;
|
||||
base = n > *bit ? 0 : *bit - (n-1);
|
||||
|
||||
switch (num_bits){
|
||||
case 0:
|
||||
mask = 1;
|
||||
break;
|
||||
case 1:
|
||||
mask = 3;
|
||||
break;
|
||||
case 2:
|
||||
mask = 7;
|
||||
break;
|
||||
case 3:
|
||||
mask = 0xf;
|
||||
break;
|
||||
case 4:
|
||||
mask = 0x1f;
|
||||
break;
|
||||
case 5:
|
||||
mask = 0x3f;
|
||||
break;
|
||||
case 6:
|
||||
mask = 0x7f;
|
||||
break;
|
||||
case 7:
|
||||
mask = 0xff;
|
||||
break;
|
||||
default:
|
||||
mask = 0xff;
|
||||
break;
|
||||
}
|
||||
|
||||
mask <<= base;
|
||||
ret = (ret << (num_bits+1)) |
|
||||
(uint64_t)((*byte & mask) >> base);
|
||||
|
||||
if( n > *bit ){
|
||||
++byte;
|
||||
++(*length);
|
||||
n -= *bit+1;
|
||||
*bit = 7;
|
||||
} else {
|
||||
*bit -= n;
|
||||
n = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(count)
|
||||
ret |= (uint64_t)1 << n_bits;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
inline uint32_t
|
||||
get_leaf_node_offset(struct chmFile *chmfile,
|
||||
const char *text,
|
||||
uint32_t initial_offset,
|
||||
uint32_t buff_size,
|
||||
uint16_t tree_depth,
|
||||
struct chmUnitInfo *ui)
|
||||
{
|
||||
unsigned char word_len;
|
||||
unsigned char pos;
|
||||
uint16_t free_space;
|
||||
char *wrd_buf;
|
||||
char *word = NULL;
|
||||
uint32_t test_offset = 0;
|
||||
uint32_t i = sizeof(uint16_t);
|
||||
unsigned char *buffer = (unsigned char *)malloc (buff_size);
|
||||
|
||||
if (NULL == buffer)
|
||||
return 0;
|
||||
|
||||
while (--tree_depth) {
|
||||
if (initial_offset == test_offset) {
|
||||
FREE(buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
test_offset = initial_offset;
|
||||
if (chm_retrieve_object (chmfile, ui, buffer,
|
||||
initial_offset, buff_size) == 0) {
|
||||
FREE(buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
free_space = get_uint16 (buffer);
|
||||
|
||||
while (i < buff_size - free_space) {
|
||||
|
||||
word_len = *(buffer + i);
|
||||
pos = *(buffer + i + 1);
|
||||
|
||||
wrd_buf = (char*)malloc (word_len);
|
||||
memcpy (wrd_buf, buffer + i + 2, word_len - 1);
|
||||
wrd_buf[word_len - 1] = 0;
|
||||
|
||||
if (pos == 0) {
|
||||
FREE (word);
|
||||
word = (char *) strdup (wrd_buf);
|
||||
} else {
|
||||
word = (char*)realloc (word, word_len + pos + 1);
|
||||
strcpy (word + pos, wrd_buf);
|
||||
}
|
||||
|
||||
FREE(wrd_buf);
|
||||
|
||||
if (strcasecmp (text, word) <= 0) {
|
||||
initial_offset = get_uint32 (buffer + i + word_len + 1);
|
||||
break;
|
||||
}
|
||||
|
||||
i += word_len + sizeof (unsigned char) + sizeof(uint32_t) +
|
||||
sizeof(uint16_t);
|
||||
}
|
||||
}
|
||||
|
||||
if(initial_offset == test_offset)
|
||||
initial_offset = 0;
|
||||
|
||||
FREE(word);
|
||||
FREE(buffer);
|
||||
|
||||
return initial_offset;
|
||||
}
|
||||
|
||||
inline int
|
||||
pychm_process_wlc (struct chmFile *chmfile,
|
||||
uint64_t wlc_count, uint64_t wlc_size,
|
||||
uint32_t wlc_offset, unsigned char ds,
|
||||
unsigned char dr, unsigned char cs,
|
||||
unsigned char cr, unsigned char ls,
|
||||
unsigned char lr, struct chmUnitInfo *uimain,
|
||||
struct chmUnitInfo* uitbl,
|
||||
struct chmUnitInfo *uistrings,
|
||||
struct chmUnitInfo* topics,
|
||||
struct chmUnitInfo *urlstr,
|
||||
PyObject *dict)
|
||||
{
|
||||
uint32_t stroff, urloff;
|
||||
uint64_t i, j, count;
|
||||
size_t length;
|
||||
int wlc_bit = 7;
|
||||
size_t off = 0;
|
||||
uint64_t index = 0;
|
||||
unsigned char entry[TOPICS_ENTRY_LEN];
|
||||
unsigned char combuf[COMMON_BUF_LEN];
|
||||
unsigned char *buffer = (unsigned char *)malloc (wlc_size);
|
||||
char *url = NULL;
|
||||
char *topic = NULL;
|
||||
|
||||
if (chm_retrieve_object(chmfile, uimain, buffer,
|
||||
wlc_offset, wlc_size) == 0) {
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i = 0; i < wlc_count; ++i) {
|
||||
|
||||
if(wlc_bit != 7) {
|
||||
++off;
|
||||
wlc_bit = 7;
|
||||
}
|
||||
|
||||
index += sr_int(buffer + off, &wlc_bit, ds, dr, &length);
|
||||
off += length;
|
||||
|
||||
if(chm_retrieve_object(chmfile, topics, entry,
|
||||
index * 16, TOPICS_ENTRY_LEN) == 0) {
|
||||
FREE(topic);
|
||||
FREE(url);
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||
stroff = get_uint32 (entry + 4);
|
||||
|
||||
FREE (topic);
|
||||
if (chm_retrieve_object (chmfile, uistrings, combuf,
|
||||
stroff, COMMON_BUF_LEN - 1) == 0) {
|
||||
topic = strdup ("Untitled in index");
|
||||
|
||||
} else {
|
||||
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||
|
||||
topic = strdup ((char*)combuf);
|
||||
}
|
||||
|
||||
urloff = get_uint32 (entry + 8);
|
||||
|
||||
if(chm_retrieve_object (chmfile, uitbl, combuf,
|
||||
urloff, 12) == 0) {
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
urloff = get_uint32 (combuf + 8);
|
||||
|
||||
if (chm_retrieve_object (chmfile, urlstr, combuf,
|
||||
urloff + 8, COMMON_BUF_LEN - 1) == 0) {
|
||||
FREE(topic);
|
||||
FREE(url);
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||
|
||||
FREE (url);
|
||||
url = strdup ((char*)combuf);
|
||||
|
||||
if (url && topic) {
|
||||
#ifdef __PYTHON__
|
||||
PyDict_SetItemString (dict, topic,
|
||||
PyString_FromString (url));
|
||||
#else
|
||||
printf ("%s ==> %s\n", url, topic);
|
||||
#endif
|
||||
}
|
||||
|
||||
count = sr_int (buffer + off, &wlc_bit, cs, cr, &length);
|
||||
off += length;
|
||||
|
||||
for (j = 0; j < count; ++j) {
|
||||
sr_int (buffer + off, &wlc_bit, ls, lr, &length);
|
||||
off += length;
|
||||
}
|
||||
}
|
||||
|
||||
FREE(topic);
|
||||
FREE(url);
|
||||
FREE(buffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
chm_search (struct chmFile *chmfile,
|
||||
const char *text, int whole_words,
|
||||
int titles_only, PyObject *dict)
|
||||
{
|
||||
unsigned char header[FTS_HEADER_LEN];
|
||||
unsigned char doc_index_s;
|
||||
unsigned char doc_index_r;
|
||||
unsigned char code_count_s;
|
||||
unsigned char code_count_r;
|
||||
unsigned char loc_codes_s;
|
||||
unsigned char loc_codes_r;
|
||||
unsigned char word_len, pos;
|
||||
unsigned char *buffer;
|
||||
char *word = NULL;
|
||||
uint32_t node_offset;
|
||||
uint32_t node_len;
|
||||
uint16_t tree_depth;
|
||||
uint32_t i;
|
||||
uint16_t free_space;
|
||||
uint64_t wlc_count, wlc_size;
|
||||
uint32_t wlc_offset;
|
||||
char *wrd_buf;
|
||||
unsigned char title;
|
||||
size_t encsz;
|
||||
struct chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;
|
||||
int partial = false;
|
||||
|
||||
if (NULL == text)
|
||||
return -1;
|
||||
|
||||
if (chm_resolve_object (chmfile, "/$FIftiMain", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (chmfile, "/#TOPICS", &uitopics) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (chmfile, "/#STRINGS", &uistrings) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (chmfile, "/#URLTBL", &uiurltbl) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (chmfile, "/#URLSTR", &uiurlstr) !=
|
||||
CHM_RESOLVE_SUCCESS)
|
||||
return false;
|
||||
|
||||
if(chm_retrieve_object(chmfile, &ui, header, 0, FTS_HEADER_LEN) == 0)
|
||||
return false;
|
||||
|
||||
doc_index_s = header[0x1E];
|
||||
doc_index_r = header[0x1F];
|
||||
code_count_s = header[0x20];
|
||||
code_count_r = header[0x21];
|
||||
loc_codes_s = header[0x22];
|
||||
loc_codes_r = header[0x23];
|
||||
|
||||
if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
node_offset = get_uint32 (header + 0x14);
|
||||
node_len = get_uint32 (header + 0x2e);
|
||||
tree_depth = get_uint16 (header + 0x18);
|
||||
|
||||
i = sizeof(uint16_t);
|
||||
|
||||
buffer = (unsigned char*)malloc (node_len);
|
||||
|
||||
node_offset = get_leaf_node_offset (chmfile, text, node_offset, node_len,
|
||||
tree_depth, &ui);
|
||||
|
||||
if (!node_offset) {
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
do {
|
||||
|
||||
if (chm_retrieve_object (chmfile, &ui, buffer,
|
||||
node_offset, node_len) == 0) {
|
||||
FREE(word);
|
||||
FREE(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
free_space = get_uint16 (buffer + 6);
|
||||
|
||||
i = sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t);
|
||||
|
||||
encsz = 0;
|
||||
|
||||
while (i < node_len - free_space) {
|
||||
word_len = *(buffer + i);
|
||||
pos = *(buffer + i + 1);
|
||||
|
||||
wrd_buf = (char*)malloc (word_len);
|
||||
memcpy (wrd_buf, buffer + i + 2, word_len - 1);
|
||||
wrd_buf[word_len - 1] = 0;
|
||||
|
||||
if (pos == 0) {
|
||||
FREE(word);
|
||||
word = (char *) strdup (wrd_buf);
|
||||
} else {
|
||||
word = (char*)realloc (word, word_len + pos + 1);
|
||||
strcpy (word + pos, wrd_buf);
|
||||
}
|
||||
|
||||
FREE(wrd_buf);
|
||||
|
||||
i += 2 + word_len;
|
||||
title = *(buffer + i - 1);
|
||||
|
||||
wlc_count = be_encint (buffer + i, &encsz);
|
||||
i += encsz;
|
||||
|
||||
wlc_offset = get_uint32 (buffer + i);
|
||||
|
||||
i += sizeof(uint32_t) + sizeof(uint16_t);
|
||||
wlc_size = be_encint (buffer + i, &encsz);
|
||||
i += encsz;
|
||||
|
||||
node_offset = get_uint32 (buffer);
|
||||
|
||||
if (!title && titles_only)
|
||||
continue;
|
||||
|
||||
if (whole_words && !strcasecmp(text, word)) {
|
||||
partial = pychm_process_wlc (chmfile, wlc_count, wlc_size,
|
||||
wlc_offset, doc_index_s,
|
||||
doc_index_r,code_count_s,
|
||||
code_count_r, loc_codes_s,
|
||||
loc_codes_r, &ui, &uiurltbl,
|
||||
&uistrings, &uitopics,
|
||||
&uiurlstr, dict);
|
||||
FREE(word);
|
||||
FREE(buffer);
|
||||
return partial;
|
||||
}
|
||||
|
||||
if (!whole_words) {
|
||||
if (!strncasecmp (word, text, strlen(text))) {
|
||||
partial = true;
|
||||
pychm_process_wlc (chmfile, wlc_count, wlc_size,
|
||||
wlc_offset, doc_index_s,
|
||||
doc_index_r,code_count_s,
|
||||
code_count_r, loc_codes_s,
|
||||
loc_codes_r, &ui, &uiurltbl,
|
||||
&uistrings, &uitopics,
|
||||
&uiurlstr, dict);
|
||||
|
||||
} else if (strncasecmp (text, word, strlen(text)) < -1)
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
} while (!whole_words &&
|
||||
!strncmp (word, text, strlen(text)) &&
|
||||
node_offset);
|
||||
|
||||
FREE(word);
|
||||
FREE(buffer);
|
||||
|
||||
return partial;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const char *file;
|
||||
int offset;
|
||||
} Langrec;
|
||||
|
||||
Langrec lang_files[] = {
|
||||
{"/$FIftiMain", 0x7E},
|
||||
{"$WWKeywordLinks/BTree", 0x34},
|
||||
{"$WWAssociativeLinks/BTree", 0x34}
|
||||
};
|
||||
|
||||
#define LANG_FILES_SIZE (sizeof(lang_files)/sizeof(Langrec))
|
||||
|
||||
int
|
||||
chm_get_lcid (struct chmFile *chmfile) {
|
||||
struct chmUnitInfo ui;
|
||||
uint32_t lang;
|
||||
int i;
|
||||
|
||||
for (i=0; i<LANG_FILES_SIZE; i++) {
|
||||
|
||||
if (chm_resolve_object (chmfile, lang_files[i].file, &ui) ==
|
||||
CHM_RESOLVE_SUCCESS) {
|
||||
|
||||
if (chm_retrieve_object (chmfile, &ui, (unsigned char *) &lang,
|
||||
lang_files[i].offset, sizeof(uint32_t)) != 0)
|
||||
return lang;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef __PYTHON__
|
||||
|
||||
static PyObject *
|
||||
is_searchable (PyObject *self, PyObject *args) {
|
||||
struct chmFile *file;
|
||||
PyObject *obj0;
|
||||
struct chmUnitInfo ui;
|
||||
|
||||
if (PyArg_ParseTuple (args, "O:is_searchable", &obj0)) {
|
||||
|
||||
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||
|
||||
if (chm_resolve_object (file, "/$FIftiMain", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (file, "/#TOPICS", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (file, "/#STRINGS", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (file, "/#URLTBL", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS ||
|
||||
chm_resolve_object (file, "/#URLSTR", &ui) !=
|
||||
CHM_RESOLVE_SUCCESS)
|
||||
return Py_BuildValue ("i", 0);
|
||||
else
|
||||
return Py_BuildValue ("i", 1);
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError, "Expected chmfile (not CHMFile!)");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
search (PyObject *self, PyObject *args) {
|
||||
char *text;
|
||||
int whole_words;
|
||||
int titles_only;
|
||||
int partial;
|
||||
struct chmFile *file;
|
||||
PyObject *obj0;
|
||||
PyObject *dict;
|
||||
|
||||
if (PyArg_ParseTuple (args, "Osii:search", &obj0, &text,
|
||||
&whole_words, &titles_only)) {
|
||||
|
||||
dict = PyDict_New();
|
||||
|
||||
if (dict) {
|
||||
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||
|
||||
partial = chm_search (file,
|
||||
text, whole_words, titles_only, dict);
|
||||
|
||||
return Py_BuildValue ("(iO)", partial, dict);
|
||||
|
||||
} else {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"Expected chmfile (not CHMFile!), string, int, int");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_lcid (PyObject *self, PyObject *args) {
|
||||
int code;
|
||||
struct chmFile *file;
|
||||
PyObject *obj0;
|
||||
|
||||
if (PyArg_ParseTuple (args, "O:get_lcid", &obj0)) {
|
||||
|
||||
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||
|
||||
code = chm_get_lcid (file);
|
||||
|
||||
if (code != -1)
|
||||
return Py_BuildValue ("i", code);
|
||||
else
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError,"Expected a chmfile (not a CHMFile!)");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static PyMethodDef
|
||||
IndexMethods[] = {
|
||||
{"get_lcid", get_lcid, METH_VARARGS,
|
||||
"Returns LCID (Locale ID) for archive."},
|
||||
{"search", search, METH_VARARGS,
|
||||
"Perform Full-Text search."},
|
||||
{"is_searchable", is_searchable, METH_VARARGS,
|
||||
"Return 1 if it is possible to search the archive, 0 otherwise."},
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
MODEXPORT(void)
|
||||
initchm_extra (void) {
|
||||
Py_InitModule ("chm_extra", IndexMethods);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int
|
||||
main (int argc, char **argv) {
|
||||
struct chmFile *file;
|
||||
char text[255];
|
||||
int whole_words, titles_only;
|
||||
int partial;
|
||||
|
||||
if (argc == 2) {
|
||||
file = chm_open (argv[1]);
|
||||
|
||||
if (file) {
|
||||
printf ("\nLCID= %d (%08X)\n", chm_get_lcid(file), chm_get_lcid(file));
|
||||
while (1) {
|
||||
printf ("\n<whole_words> <titles_only> <string>\n");
|
||||
printf ("> ");
|
||||
if (scanf ("%d %d %s", &whole_words, &titles_only, text))
|
||||
partial = chm_search (file,
|
||||
text, whole_words, titles_only, NULL);
|
||||
else
|
||||
break;
|
||||
|
||||
printf ("Partial = %d\n", partial);
|
||||
}
|
||||
|
||||
chm_close (file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
||||
} else {
|
||||
printf ("\n%s <filename>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
247
src/calibre/utils/chm/stdint.h
Normal file
247
src/calibre/utils/chm/stdint.h
Normal file
@ -0,0 +1,247 @@
|
||||
// ISO C9x compliant stdint.h for Microsoft Visual Studio
|
||||
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
|
||||
//
|
||||
// Copyright (c) 2006-2008 Alexander Chemeris
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. The name of the author may be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _MSC_VER // [
|
||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||
#endif // _MSC_VER ]
|
||||
|
||||
#ifndef _MSC_STDINT_H_ // [
|
||||
#define _MSC_STDINT_H_
|
||||
|
||||
#if _MSC_VER > 1000
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
|
||||
// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
|
||||
// or compiler give many errors like this:
|
||||
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
# include <wchar.h>
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
// Define _W64 macros to mark types changing their size, like intptr_t.
|
||||
#ifndef _W64
|
||||
# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
|
||||
# define _W64 __w64
|
||||
# else
|
||||
# define _W64
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
// 7.18.1 Integer types
|
||||
|
||||
// 7.18.1.1 Exact-width integer types
|
||||
|
||||
// Visual Studio 6 and Embedded Visual C++ 4 doesn't
|
||||
// realize that, e.g. char has the same size as __int8
|
||||
// so we give up on __intX for them.
|
||||
#if (_MSC_VER < 1300)
|
||||
typedef signed char int8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
#else
|
||||
typedef signed __int8 int8_t;
|
||||
typedef signed __int16 int16_t;
|
||||
typedef signed __int32 int32_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
#endif
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
|
||||
// 7.18.1.2 Minimum-width integer types
|
||||
typedef int8_t int_least8_t;
|
||||
typedef int16_t int_least16_t;
|
||||
typedef int32_t int_least32_t;
|
||||
typedef int64_t int_least64_t;
|
||||
typedef uint8_t uint_least8_t;
|
||||
typedef uint16_t uint_least16_t;
|
||||
typedef uint32_t uint_least32_t;
|
||||
typedef uint64_t uint_least64_t;
|
||||
|
||||
// 7.18.1.3 Fastest minimum-width integer types
|
||||
typedef int8_t int_fast8_t;
|
||||
typedef int16_t int_fast16_t;
|
||||
typedef int32_t int_fast32_t;
|
||||
typedef int64_t int_fast64_t;
|
||||
typedef uint8_t uint_fast8_t;
|
||||
typedef uint16_t uint_fast16_t;
|
||||
typedef uint32_t uint_fast32_t;
|
||||
typedef uint64_t uint_fast64_t;
|
||||
|
||||
// 7.18.1.4 Integer types capable of holding object pointers
|
||||
#ifdef _WIN64 // [
|
||||
typedef signed __int64 intptr_t;
|
||||
typedef unsigned __int64 uintptr_t;
|
||||
#else // _WIN64 ][
|
||||
typedef _W64 signed int intptr_t;
|
||||
typedef _W64 unsigned int uintptr_t;
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// 7.18.1.5 Greatest-width integer types
|
||||
typedef int64_t intmax_t;
|
||||
typedef uint64_t uintmax_t;
|
||||
|
||||
|
||||
// 7.18.2 Limits of specified-width integer types
|
||||
|
||||
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
|
||||
|
||||
// 7.18.2.1 Limits of exact-width integer types
|
||||
#define INT8_MIN ((int8_t)_I8_MIN)
|
||||
#define INT8_MAX _I8_MAX
|
||||
#define INT16_MIN ((int16_t)_I16_MIN)
|
||||
#define INT16_MAX _I16_MAX
|
||||
#define INT32_MIN ((int32_t)_I32_MIN)
|
||||
#define INT32_MAX _I32_MAX
|
||||
#define INT64_MIN ((int64_t)_I64_MIN)
|
||||
#define INT64_MAX _I64_MAX
|
||||
#define UINT8_MAX _UI8_MAX
|
||||
#define UINT16_MAX _UI16_MAX
|
||||
#define UINT32_MAX _UI32_MAX
|
||||
#define UINT64_MAX _UI64_MAX
|
||||
|
||||
// 7.18.2.2 Limits of minimum-width integer types
|
||||
#define INT_LEAST8_MIN INT8_MIN
|
||||
#define INT_LEAST8_MAX INT8_MAX
|
||||
#define INT_LEAST16_MIN INT16_MIN
|
||||
#define INT_LEAST16_MAX INT16_MAX
|
||||
#define INT_LEAST32_MIN INT32_MIN
|
||||
#define INT_LEAST32_MAX INT32_MAX
|
||||
#define INT_LEAST64_MIN INT64_MIN
|
||||
#define INT_LEAST64_MAX INT64_MAX
|
||||
#define UINT_LEAST8_MAX UINT8_MAX
|
||||
#define UINT_LEAST16_MAX UINT16_MAX
|
||||
#define UINT_LEAST32_MAX UINT32_MAX
|
||||
#define UINT_LEAST64_MAX UINT64_MAX
|
||||
|
||||
// 7.18.2.3 Limits of fastest minimum-width integer types
|
||||
#define INT_FAST8_MIN INT8_MIN
|
||||
#define INT_FAST8_MAX INT8_MAX
|
||||
#define INT_FAST16_MIN INT16_MIN
|
||||
#define INT_FAST16_MAX INT16_MAX
|
||||
#define INT_FAST32_MIN INT32_MIN
|
||||
#define INT_FAST32_MAX INT32_MAX
|
||||
#define INT_FAST64_MIN INT64_MIN
|
||||
#define INT_FAST64_MAX INT64_MAX
|
||||
#define UINT_FAST8_MAX UINT8_MAX
|
||||
#define UINT_FAST16_MAX UINT16_MAX
|
||||
#define UINT_FAST32_MAX UINT32_MAX
|
||||
#define UINT_FAST64_MAX UINT64_MAX
|
||||
|
||||
// 7.18.2.4 Limits of integer types capable of holding object pointers
|
||||
#ifdef _WIN64 // [
|
||||
# define INTPTR_MIN INT64_MIN
|
||||
# define INTPTR_MAX INT64_MAX
|
||||
# define UINTPTR_MAX UINT64_MAX
|
||||
#else // _WIN64 ][
|
||||
# define INTPTR_MIN INT32_MIN
|
||||
# define INTPTR_MAX INT32_MAX
|
||||
# define UINTPTR_MAX UINT32_MAX
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// 7.18.2.5 Limits of greatest-width integer types
|
||||
#define INTMAX_MIN INT64_MIN
|
||||
#define INTMAX_MAX INT64_MAX
|
||||
#define UINTMAX_MAX UINT64_MAX
|
||||
|
||||
// 7.18.3 Limits of other integer types
|
||||
|
||||
#ifdef _WIN64 // [
|
||||
# define PTRDIFF_MIN _I64_MIN
|
||||
# define PTRDIFF_MAX _I64_MAX
|
||||
#else // _WIN64 ][
|
||||
# define PTRDIFF_MIN _I32_MIN
|
||||
# define PTRDIFF_MAX _I32_MAX
|
||||
#endif // _WIN64 ]
|
||||
|
||||
#define SIG_ATOMIC_MIN INT_MIN
|
||||
#define SIG_ATOMIC_MAX INT_MAX
|
||||
|
||||
#ifndef SIZE_MAX // [
|
||||
# ifdef _WIN64 // [
|
||||
# define SIZE_MAX _UI64_MAX
|
||||
# else // _WIN64 ][
|
||||
# define SIZE_MAX _UI32_MAX
|
||||
# endif // _WIN64 ]
|
||||
#endif // SIZE_MAX ]
|
||||
|
||||
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
|
||||
#ifndef WCHAR_MIN // [
|
||||
# define WCHAR_MIN 0
|
||||
#endif // WCHAR_MIN ]
|
||||
#ifndef WCHAR_MAX // [
|
||||
# define WCHAR_MAX _UI16_MAX
|
||||
#endif // WCHAR_MAX ]
|
||||
|
||||
#define WINT_MIN 0
|
||||
#define WINT_MAX _UI16_MAX
|
||||
|
||||
#endif // __STDC_LIMIT_MACROS ]
|
||||
|
||||
|
||||
// 7.18.4 Limits of other integer types
|
||||
|
||||
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
|
||||
|
||||
// 7.18.4.1 Macros for minimum-width integer constants
|
||||
|
||||
#define INT8_C(val) val##i8
|
||||
#define INT16_C(val) val##i16
|
||||
#define INT32_C(val) val##i32
|
||||
#define INT64_C(val) val##i64
|
||||
|
||||
#define UINT8_C(val) val##ui8
|
||||
#define UINT16_C(val) val##ui16
|
||||
#define UINT32_C(val) val##ui32
|
||||
#define UINT64_C(val) val##ui64
|
||||
|
||||
// 7.18.4.2 Macros for greatest-width integer constants
|
||||
#define INTMAX_C INT64_C
|
||||
#define UINTMAX_C UINT64_C
|
||||
|
||||
#endif // __STDC_CONSTANT_MACROS ]
|
||||
|
||||
|
||||
#endif // _MSC_STDINT_H_ ]
|
1233
src/calibre/utils/chm/swig_chm.c
Normal file
1233
src/calibre/utils/chm/swig_chm.c
Normal file
File diff suppressed because it is too large
Load Diff
214
src/calibre/utils/chm/swig_chm.i
Normal file
214
src/calibre/utils/chm/swig_chm.i
Normal file
@ -0,0 +1,214 @@
|
||||
%module chmlib
|
||||
%include "typemaps.i"
|
||||
%include "cstring.i"
|
||||
|
||||
%{
|
||||
/*
|
||||
Copyright (C) 2003 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||
|
||||
Based on code by:
|
||||
Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net>
|
||||
|
||||
pychm is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public
|
||||
License along with this program; see the file COPYING. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA
|
||||
|
||||
$Id: swig_chm.i,v 1.1.1.1 2003/12/02 12:38:14 rubensr Exp $
|
||||
*/
|
||||
#include "chm_lib.h"
|
||||
#include <stdio.h>
|
||||
|
||||
static PyObject *my_callback = NULL;
|
||||
|
||||
static PyObject *
|
||||
my_set_callback(PyObject *dummy, PyObject *arg)
|
||||
{
|
||||
PyObject *result = NULL;
|
||||
|
||||
if (!PyCallable_Check(arg)) {
|
||||
PyErr_SetString(PyExc_TypeError, "parameter must be callable");
|
||||
return NULL;
|
||||
}
|
||||
Py_XINCREF(arg); /* Add a reference to new callback */
|
||||
Py_XDECREF(my_callback); /* Dispose of previous callback */
|
||||
my_callback = arg; /* Remember new callback */
|
||||
/* Boilerplate to return "None" */
|
||||
Py_INCREF(Py_None);
|
||||
result = Py_None;
|
||||
return result;
|
||||
}
|
||||
|
||||
int dummy_enumerator (struct chmFile *h,
|
||||
struct chmUnitInfo *ui,
|
||||
void *context) {
|
||||
PyObject *arglist;
|
||||
PyObject *result;
|
||||
PyObject *py_h;
|
||||
PyObject *py_ui;
|
||||
PyObject *py_c;
|
||||
|
||||
py_h = SWIG_NewPointerObj((void *) h, SWIGTYPE_p_chmFile, 0);
|
||||
py_ui = SWIG_NewPointerObj((void *) ui, SWIGTYPE_p_chmUnitInfo, 0);
|
||||
py_c = PyCObject_AsVoidPtr(context);
|
||||
|
||||
/* Time to call the callback */
|
||||
arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
|
||||
if (arglist) {
|
||||
result = PyEval_CallObject(my_callback, arglist);
|
||||
Py_DECREF(arglist);
|
||||
Py_DECREF(result);
|
||||
|
||||
Py_DECREF(py_h);
|
||||
Py_DECREF(py_ui);
|
||||
|
||||
if (result == NULL) {
|
||||
return 0; /* Pass error back */
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
%}
|
||||
|
||||
%typemap(in) CHM_ENUMERATOR {
|
||||
if (!my_set_callback(self, $input)) goto fail;
|
||||
$1 = dummy_enumerator;
|
||||
}
|
||||
|
||||
%typemap(in) void *context {
|
||||
if (!($1 = PyCObject_FromVoidPtr($input, NULL))) goto fail;
|
||||
}
|
||||
|
||||
%typemap(in, numinputs=0) struct chmUnitInfo *OutValue (struct chmUnitInfo *temp = (struct chmUnitInfo *) calloc(1, sizeof(struct chmUnitInfo))) {
|
||||
$1 = temp;
|
||||
}
|
||||
|
||||
%typemap(argout) struct chmUnitInfo *OutValue {
|
||||
PyObject *o, *o2, *o3;
|
||||
o = SWIG_NewPointerObj((void *) $1, SWIGTYPE_p_chmUnitInfo, 1);
|
||||
if ((!$result) || ($result == Py_None)) {
|
||||
$result = o;
|
||||
} else {
|
||||
if (!PyTuple_Check($result)) {
|
||||
PyObject *o2 = $result;
|
||||
$result = PyTuple_New(1);
|
||||
PyTuple_SetItem($result,0,o2);
|
||||
}
|
||||
o3 = PyTuple_New(1);
|
||||
PyTuple_SetItem(o3,0,o);
|
||||
o2 = $result;
|
||||
$result = PySequence_Concat(o2,o3);
|
||||
Py_DECREF(o2);
|
||||
Py_DECREF(o3);
|
||||
}
|
||||
}
|
||||
|
||||
%typemap(check) unsigned char *OUTPUT {
|
||||
/* nasty hack */
|
||||
#ifdef __cplusplus
|
||||
$1 = ($1_ltype) new char[arg5];
|
||||
#else
|
||||
$1 = ($1_ltype) malloc(arg5);
|
||||
#endif
|
||||
if ($1 == NULL) SWIG_fail;
|
||||
}
|
||||
|
||||
%typemap(argout,fragment="t_output_helper") unsigned char *OUTPUT {
|
||||
PyObject *o;
|
||||
o = PyString_FromStringAndSize($1, arg5);
|
||||
$result = t_output_helper($result,o);
|
||||
#ifdef __cplusplus
|
||||
delete [] $1;
|
||||
#else
|
||||
free($1);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
typedef unsigned __int64 LONGUINT64;
|
||||
typedef __int64 LONGINT64;
|
||||
#else
|
||||
typedef unsigned long long LONGUINT64;
|
||||
typedef long long LONGINT64;
|
||||
#endif
|
||||
|
||||
/* the two available spaces in a CHM file */
|
||||
/* N.B.: The format supports arbitrarily many spaces, but only */
|
||||
/* two appear to be used at present. */
|
||||
#define CHM_UNCOMPRESSED (0)
|
||||
#define CHM_COMPRESSED (1)
|
||||
|
||||
/* structure representing an ITS (CHM) file stream */
|
||||
struct chmFile;
|
||||
|
||||
/* structure representing an element from an ITS file stream */
|
||||
#define CHM_MAX_PATHLEN 256
|
||||
struct chmUnitInfo
|
||||
{
|
||||
LONGUINT64 start;
|
||||
LONGUINT64 length;
|
||||
int space;
|
||||
char path[CHM_MAX_PATHLEN+1];
|
||||
};
|
||||
|
||||
/* open an ITS archive */
|
||||
struct chmFile* chm_open(const char *filename);
|
||||
|
||||
/* close an ITS archive */
|
||||
void chm_close(struct chmFile *h);
|
||||
|
||||
/* methods for ssetting tuning parameters for particular file */
|
||||
#define CHM_PARAM_MAX_BLOCKS_CACHED 0
|
||||
void chm_set_param(struct chmFile *h,
|
||||
int paramType,
|
||||
int paramVal);
|
||||
|
||||
/* resolve a particular object from the archive */
|
||||
#define CHM_RESOLVE_SUCCESS (0)
|
||||
#define CHM_RESOLVE_FAILURE (1)
|
||||
int chm_resolve_object(struct chmFile *h,
|
||||
const char *objPath,
|
||||
struct chmUnitInfo *OutValue);
|
||||
|
||||
/* retrieve part of an object from the archive */
|
||||
LONGINT64 chm_retrieve_object(struct chmFile *h,
|
||||
struct chmUnitInfo *ui,
|
||||
unsigned char *OUTPUT,
|
||||
LONGUINT64 addr,
|
||||
LONGINT64 len);
|
||||
|
||||
/* enumerate the objects in the .chm archive */
|
||||
typedef int (*CHM_ENUMERATOR)(struct chmFile *h,
|
||||
struct chmUnitInfo *ui,
|
||||
void *context);
|
||||
#define CHM_ENUMERATE_NORMAL (1)
|
||||
#define CHM_ENUMERATE_META (2)
|
||||
#define CHM_ENUMERATE_SPECIAL (4)
|
||||
#define CHM_ENUMERATE_FILES (8)
|
||||
#define CHM_ENUMERATE_DIRS (16)
|
||||
#define CHM_ENUMERATE_ALL (31)
|
||||
#define CHM_ENUMERATOR_FAILURE (0)
|
||||
#define CHM_ENUMERATOR_CONTINUE (1)
|
||||
#define CHM_ENUMERATOR_SUCCESS (2)
|
||||
int chm_enumerate(struct chmFile *h,
|
||||
int what,
|
||||
CHM_ENUMERATOR e,
|
||||
void *context);
|
||||
|
||||
int chm_enumerate_dir(struct chmFile *h,
|
||||
const char *prefix,
|
||||
int what,
|
||||
CHM_ENUMERATOR e,
|
||||
void *context);
|
@ -4,6 +4,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import time, bz2
|
||||
from calibre.constants import isfreebsd
|
||||
|
||||
from calibre.constants import __version__, __appname__, __author__
|
||||
|
||||
@ -57,6 +58,9 @@ def create_man_page(prog, parser):
|
||||
lines = [x if isinstance(x, unicode) else unicode(x, 'utf-8', 'replace') for
|
||||
x in lines]
|
||||
|
||||
return bz2.compress((u'\n'.join(lines)).encode('utf-8'))
|
||||
if not isfreebsd:
|
||||
return bz2.compress((u'\n'.join(lines)).encode('utf-8'))
|
||||
else:
|
||||
return (u'\n'.join(lines)).encode('utf-8')
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user