KG updates

This commit is contained in:
GRiker 2010-03-02 16:26:25 -08:00
commit 7ad7c34eee
54 changed files with 3929 additions and 490 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 766 B

View File

@ -5,76 +5,103 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
theatlantic.com
'''
import re
import string
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
class TheAtlantic(BasicNewsRecipe):
title = 'The Atlantic'
__author__ = 'Kovid Goyal and Sujata Raman'
description = 'Current affairs and politics focussed on the US'
INDEX = 'http://www.theatlantic.com/doc/current'
INDEX = 'http://www.theatlantic.com/magazine/toc/0/'
language = 'en'
remove_tags_before = dict(name='div', id='storytop')
remove_tags = [
dict(name='div', id=['seealso','storybottom', 'footer', 'ad_banner_top', 'sidebar','articletoolstop','subcontent',]),
dict(name='p', attrs={'id':["pagination"]}),
dict(name='table',attrs={'class':"tools"}),
dict(name='style'),
dict(name='a', href='/a/newsletters.mhtml')
]
remove_attributes = ['icap', 'callout', 'style']
no_stylesheets = True
conversion_options = { 'linearize_tables':True }
remove_tags_before = dict(name='div', id='articleHead')
remove_tags_after = dict(id='copyright')
remove_tags = [dict(id=['header', 'printAds', 'pageControls'])]
no_stylesheets = True
def print_version(self, url):
return url.replace('/archive/', '/print/')
extra_css = '''
#timestamp{font-family:Arial,Helvetica,sans-serif; color:#666666 ;font-size:x-small}
#storytype{font-family:Arial,Helvetica,sans-serif; color:#D52B1E ;font-weight:bold; font-size:x-small}
h2{font-family:georgia,serif; font-style:italic;font-size:x-small;font-weight:normal;}
h1{font-family:georgia,serif; font-weight:bold; font-size:large}
#byline{font-family:georgia,serif; font-weight:bold; font-size:x-small}
#topgraf{font-family:Arial,Helvetica,sans-serif;font-size:x-small;font-weight:bold;}
.artsans{{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
'''
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
sectit = soup.find('h1', attrs={'class':'sectionTitle'})
if sectit is not None:
texts = sectit.findAll('cufontext')
texts = map(self.tag_to_string, texts[-2:])
self.timefmt = ' [%s]'%(''.join(texts))
issue = soup.find('span', attrs={'class':'issue'})
if issue:
self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
cover = soup.find('img', alt=re.compile('Cover'), src=True)
cover = soup.find('img', src=True, attrs={'class':'cover'})
if cover is not None:
self.cover_url = 'http://theatlantic.com'+cover['src']
self.cover_url = cover['src']
for item in soup.findAll('div', attrs={'class':'item'}):
a = item.find('a')
if a and a.has_key('href'):
feeds = []
for section in soup.findAll('div', attrs={'class':'magazineSection'}):
section_title = section.find(attrs={'class':'sectionHeader'})
section_title = string.capwords(self.tag_to_string(section_title))
self.log('Found section:', section_title)
articles = []
for post in section.findAll('div', attrs={'class':'post'}):
h = post.find(['h3', 'h4'])
title = self.tag_to_string(h)
a = post.find('a', href=True)
url = a['href']
if not url.startswith('http://'):
url = 'http://www.theatlantic.com/'+url
url = url.replace('/doc/', '/doc/print/')
title = self.tag_to_string(a)
if title in ('VIDEO', 'AUDIO', 'INTERACTIVE MAP', 'SIDEBAR', 'RECIPES'):
continue
title = title.replace('&AMP;', '&')
byline = item.find(attrs={'class':'byline'})
date = self.tag_to_string(byline) if byline else ''
description = ''
if url.startswith('/'):
url = 'http://www.theatlantic.com'+url
p = post.find('p', attrs={'class':'dek'})
desc = None
self.log('\tFound article:', title, 'at', url)
if p is not None:
desc = self.tag_to_string(p)
self.log('\t\t', desc)
articles.append({'title':title, 'url':url, 'description':desc,
'date':''})
feeds.append((section_title, articles))
self.log('\tFound article:', title)
self.log('\t\t', url)
poems = []
self.log('Found section: Poems')
for poem in soup.findAll('div', attrs={'class':'poem'}):
title = self.tag_to_string(poem.find('h4'))
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
url = 'http://www.theatlantic.com'+poem.find('a')['href']
self.log('\tFound article:', title, 'at', url)
self.log('\t\t', desc)
poems.append({'title':title, 'url':url, 'description':desc,
'date':''})
if poems:
feeds.append(('Poems', poems))
articles.append({
'title':title,
'date':date,
'url':url,
'description':description
})
self.log('Found section: Advice')
div = soup.find(id='advice')
title = self.tag_to_string(div.find('h4'))
url = 'http://www.theatlantic.com'+div.find('a')['href']
desc = self.tag_to_string(div.find('p'))
self.log('\tFound article:', title, 'at', url)
self.log('\t\t', desc)
feeds.append(('Advice', [{'title':title, 'url':url, 'description':desc,
'date':''}]))
return feeds
def postprocess_html(self, soup, first):
for table in soup.findAll('table', align='right'):
img = table.find('img')
if img is not None:
img.extract()
caption = self.tag_to_string(table).strip()
div = Tag(soup, 'div')
div['style'] = 'text-align:center'
div.insert(0, img)
div.insert(1, Tag(soup, 'br'))
if caption:
div.insert(2, NavigableString(caption))
table.replaceWith(div)
return soup
return [('Current Issue', articles)]

View File

@ -0,0 +1,50 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.diariovasco.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DiarioVasco(BasicNewsRecipe):
title = 'Diario Vasco'
__author__ = 'Darko Miletic'
description = 'Noticias de pais Vasco y el resto del mundo'
publisher = 'Diario Vasco'
category = 'news, politics, Spain'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'es'
remove_empty_feeds = True
masthead_url = 'http://www.diariovasco.com/img/rd.logotipo2_dvasco.gif'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [
dict(attrs={'id':'title'})
,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
]
remove_tags = [dict(name='ul')]
remove_attributes = ['width','height']
feeds = [
(u'Ultimas Noticias' , u'http://www.diariovasco.com/rss/feeds/ultima.xml' )
,(u'Portada' , u'http://www.diariovasco.com/portada.xml' )
,(u'Politica' , u'http://www.diariovasco.com/rss/feeds/politica.xml' )
,(u'Deportes' , u'http://www.diariovasco.com/rss/feeds/deportes.xml' )
,(u'Economia' , u'http://www.diariovasco.com/rss/feeds/economia.xml' )
,(u'Mundo' , u'http://www.diariovasco.com/rss/feeds/mundo.xml' )
,(u'Cultura' , u'http://www.diariovasco.com/rss/feeds/cultura.xml' )
,(u'Gente' , u'http://www.diariovasco.com/rss/feeds/gente.xml' )
,(u'Contraportada' , u'http://www.diariovasco.com/rss/feeds/contraportada.xml')
]

View File

@ -119,6 +119,8 @@ class Economist(BasicNewsRecipe):
ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))
del img['width']
del img['height']
img.extract()
div.insert(2, img)
table.replaceWith(div)

View File

@ -123,6 +123,8 @@ class Economist(BasicNewsRecipe):
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))
img.extract()
del img['width']
del img['height']
div.insert(2, img)
table.replaceWith(div)
return soup

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Starson17'
'''
www.epicurious.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Epicurious(BasicNewsRecipe):
title = u'Epicurious'
__author__ = 'Starson17'
description = 'Food and Recipes from Epicurious'
cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
publisher = 'Epicurious'
tags = 'news, food, gourmet, recipes'
language = 'en'
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
recursions = 3
oldest_article = 14
max_articles_per_feed = 20
keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
]
remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
{'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
dict(name='div', attrs={'class':['tagged','comments']})
]
remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
feeds = [
(u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
(u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
(u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
(u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
]
match_regexps = [
r'http://www.epicurious.com/.*recipes/.*/views'
]
preprocess_regexps = [
(re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
(re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
(re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
]
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup

View File

@ -3,7 +3,7 @@ import re
class HuffingtonPostRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
__author__ = 'kwetal and Archana Raman'
language = 'en'
version = 2
@ -14,70 +14,89 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
oldest_article = 1.1
max_articles_per_feed = 100
use_embedded_content = True
#use_embedded_content = True
encoding = 'utf-8'
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
# Feeds from: http://www.huffingtonpost.com/syndication/
feeds = []
feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))
#feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
#feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
#feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
#feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
#feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
#feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
#feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
#feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
#feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
#feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
#feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
#feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
#feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
#feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
#feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
#feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
#feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
#feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
#feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
#feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
#feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
#feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
#feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
#feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
#feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
#feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
#feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
#feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
#feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
#feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
#feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
remove_tags = []
remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
remove_tags.append(dict(name='a', attrs={'class' : 'home_pixie'}))
remove_tags.append(dict(name='div', attrs={'id' : ["top_nav",'threeup_top_wrapper','breaking_news_container',"hp_social_network"]}))
remove_tags.append(dict(name='img', alt="Connect"))
remove_tags.append(dict(name='div', attrs={'class' : ['logo']})) #'share_boxes_box_block_b_wraper',
remove_tags.append(dict(name='div', attrs={'class' :[ 'read_more with_verticals','chicklets_box_outter_v05','blogger_menu_content','chicklets_bar']}))
remove_tags.append(dict(name='div', attrs={'class' : ['sidebar_blog_first_design','sidebar_blog_second_design',]}))
remove_tags.append(dict(name='div', attrs={'class' : ['main_big_news_ontop','login-menu','sidebar_blog_third_design','read_more']}))
remove_tags_after = [dict(name='div', attrs={'class' : 'entry_content'}) ]
# remove_attributes = ['style']
remove_attributes = ['style']
extra_css = '''
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
h2{font-size: x-large; font-weight: bold; padding: 0em; margin-bottom: 0.2em;}
a[href]{color: blue; text-decoration: none; cursor: pointer;}
#title_permalink{color:black;font-size:large;}
.date{color:#858585;font-family:"Times New Roman",sans-serif;}
.comments_datetime v05{color:#696969;}
.teaser_permalink{font-style:italic;font-size:xx-small;}
.blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
'''
#a[href]{color: blue; text-decoration: none; cursor: pointer;}
def get_article_url(self, article):
"""
Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
@ -85,10 +104,21 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
Todo: refactor to searching this list to avoid the hardcoded zero-index
"""
link = article.get('link')
print("Link:"+link)
if not link:
links = article.get('links')
if links:
link = links[0]['href']
if not links[0]['href']:
link = links[1]['href']
return link
def postprocess_html(self, soup, first_fetch):
for tag in soup.findAll('div',text = "What's Your Reaction?"):
tag.extract()
for tg in soup.findAll('blockquote'):
tg.extract()
return soup

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Mori'
__version__ = 'v. 0.1'
'''
Kukuburi.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class KukuburiRecipe(BasicNewsRecipe):
__author__ = 'Mori'
language = 'en'
title = u'Kukuburi'
publisher = u'Ramón Pérez'
description =u'KUKUBURI by Ram\xc3\xb3n P\xc3\xa9rez'
no_stylesheets = True
remove_javascript = True
oldest_article = 100
max_articles_per_feed = 100
feeds = [
(u'Kukuburi', u'http://feeds2.feedburner.com/Kukuburi')
]
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<!--.*?-->', lambda match: ''),
(r'<div class="feedflare".*?</div>', lambda match: '')
]
]

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
lasegunda.com
'''
@ -19,43 +17,38 @@ class LaSegunda(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_javascript = True
language = 'es'
masthead_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_empty_feeds = True
language = 'es'
extra_css = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
keep_only_tags = [dict(name='table')]
remove_tags_before = dict(attrs={'class':'titulonegritastop'})
remove_tags = [dict(name='img')]
remove_attributes = ['width','height']
feeds = [
(u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
,(u'Politica', u'http://www.lasegunda.com/rss20/index.asp?canal=21')
,(u'Cronica', u'http://www.lasegunda.com/rss20/index.asp?canal=20')
,(u'Internacional', u'http://www.lasegunda.com/rss20/index.asp?canal=23')
,(u'Deportes', u'http://www.lasegunda.com/rss20/index.asp?canal=24')
,(u'Epectaculos/Cultura', u'http://www.lasegunda.com/rss20/index.asp?canal=25')
,(u'Educacion', u'http://www.lasegunda.com/rss20/index.asp?canal=26')
,(u'Ciencia y Tecnologia', u'http://www.lasegunda.com/rss20/index.asp?canal=27')
,(u'Solidaridad', u'http://www.lasegunda.com/rss20/index.asp?canal=28')
,(u'Buena Vida', u'http://www.lasegunda.com/rss20/index.asp?canal=32')
,(u'Politica' , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
,(u'Cronica' , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
,(u'Internacional' , u'http://www.lasegunda.com/rss20/index.asp?canal=23')
,(u'Deportes' , u'http://www.lasegunda.com/rss20/index.asp?canal=24')
,(u'Epectaculos/Cultura' , u'http://www.lasegunda.com/rss20/index.asp?canal=25')
,(u'Educacion' , u'http://www.lasegunda.com/rss20/index.asp?canal=26')
,(u'Ciencia y Tecnologia' , u'http://www.lasegunda.com/rss20/index.asp?canal=27')
,(u'Solidaridad' , u'http://www.lasegunda.com/rss20/index.asp?canal=28')
,(u'Buena Vida' , u'http://www.lasegunda.com/rss20/index.asp?canal=32')
]
def print_version(self, url):
rest, sep, article_id = url.partition('index.asp?idnoticia=')
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
latercera.com
'''
@ -18,32 +16,32 @@ class LaTercera(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1252'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_empty_feeds = True
language = 'es'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
remove_tags = [
dict(name='script')
,dict(name='ul')
dict(name=['ul','input','base'])
,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
,dict(name='input')
,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
]
feeds = [
(u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
,(u'Pais', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=654')
,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')
,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
@ -55,10 +53,6 @@ class LaTercera(BasicNewsRecipe):
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = 'es'

View File

@ -11,7 +11,8 @@ import sys, re, os, platform
is64bit = platform.architecture()[0] == '64bit'
iswindows = re.search('win(32|64)', sys.platform)
isosx = 'darwin' in sys.platform
islinux = not isosx and not iswindows
isfreebsd = 'freebsd' in sys.platform
islinux = not isosx and not iswindows and not isfreebsd
SRC = os.path.abspath('src')
sys.path.insert(0, SRC)
sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
@ -117,7 +118,7 @@ class Command(object):
self.real_user = os.environ.get('SUDO_USER', None)
def drop_privileges(self):
if not islinux or isosx:
if not islinux or isosx or isfreebsd:
return
if self.real_user is not None:
self.info('Dropping privileges to those of', self.real_user+':',
@ -128,7 +129,7 @@ class Command(object):
os.seteuid(int(self.real_uid))
def regain_privileges(self):
if not islinux or isosx:
if not islinux or isosx or isfreebsd:
return
if os.geteuid() != 0 and self.orig_euid == 0:
self.info('Trying to get root privileges')

View File

@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
fc_lib = '/usr/lib'
podofo_inc = '/usr/include/podofo'
podofo_lib = '/usr/lib'
chmlib_inc_dirs = chmlib_lib_dirs = []
if iswindows:
prefix = r'C:\cygwin\home\kovid\sw'
@ -96,6 +97,10 @@ if iswindows:
sw_lib_dir = os.path.join(prefix, 'lib')
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
fc_lib = sw_lib_dir
chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
'build', 'chmlib-0.40', 'src'))
chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
'build', 'chmlib-0.40', 'src', 'Release'))
png_inc_dirs = [sw_inc_dir]
png_lib_dirs = [sw_lib_dir]
png_libs = ['png12']

View File

@ -11,15 +11,16 @@ from distutils import sysconfig
from PyQt4.pyqtconfig import QtGuiModuleMakefile
from setup import Command, islinux, isosx, SRC, iswindows
from setup.build_environment import fc_inc, fc_lib, \
from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
jpg_lib_dirs, chmlib_lib_dirs
MT
isunix = islinux or isosx
isunix = islinux or isosx or isfreebsd
make = 'make' if isunix else NMAKE
@ -56,6 +57,22 @@ if iswindows:
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
extensions = [
Extension('chmlib',
['calibre/utils/chm/swig_chm.c'],
libraries=['ChmLib' if iswindows else 'chm'],
inc_dirs=chmlib_inc_dirs,
lib_dirs=chmlib_lib_dirs,
cflags=["-DSWIG_COBJECT_TYPES"]),
Extension('chm_extra',
['calibre/utils/chm/extra.c'],
libraries=['ChmLib' if iswindows else 'chm'],
inc_dirs=chmlib_inc_dirs,
lib_dirs=chmlib_lib_dirs,
cflags=["-D__PYTHON__"]),
Extension('pdfreflow',
reflow_sources,
headers=reflow_headers,
@ -154,6 +171,13 @@ if islinux:
ldflags.append('-lpython'+sysconfig.get_python_version())
if isfreebsd:
cflags.append('-pthread')
ldflags.append('-shared')
cflags.append('-I'+sysconfig.get_python_inc())
ldflags.append('-lpython'+sysconfig.get_python_version())
if isosx:
x, p = ('i386', 'ppc')
archs = ['-arch', x, '-arch', p, '-isysroot',

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
from setup import Command, islinux, basenames, modules, functions, \
from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
__appname__, __version__
HEADER = '''\
@ -116,7 +116,7 @@ class Develop(Command):
def pre_sub_commands(self, opts):
if not islinux:
if not (islinux or isfreebsd):
self.info('\nSetting up a source based development environment is only '
'supported on linux. On other platforms, see the User Manual'
' for help with setting up a development environment.')
@ -156,7 +156,7 @@ class Develop(Command):
self.warn('Failed to compile mount helper. Auto mounting of',
' devices will not work')
if os.geteuid() != 0:
if not isfreebsd and os.geteuid() != 0:
return self.warn('Must be run as root to compile mount helper. Auto '
'mounting of devices will not work.')
src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
@ -168,9 +168,10 @@ class Develop(Command):
ret = p.wait()
if ret != 0:
return warn()
os.chown(dest, 0, 0)
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
if not isfreebsd:
os.chown(dest, 0, 0)
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
self.manifest.append(dest)
return dest

View File

@ -42,6 +42,7 @@ class LinuxFreeze(Command):
'/usr/lib/liblcms.so.1',
'/tmp/calibre-mount-helper',
'/usr/lib/libunrar.so',
'/usr/lib/libchm.so.0',
'/usr/lib/libsqlite3.so.0',
'/usr/lib/libsqlite3.so.0',
'/usr/lib/libmng.so.1',

View File

@ -459,7 +459,7 @@ class Py2App(object):
@flush
def add_misc_libraries(self):
for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
info('\nAdding', x)
x = 'lib%s.dylib'%x
shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)

View File

@ -12,7 +12,7 @@ warnings.simplefilter('ignore', DeprecationWarning)
from calibre.startup import plugins, winutil, winutilerror
from calibre.constants import iswindows, isosx, islinux, isfrozen, \
from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
terminal_controller, preferred_encoding, \
__appname__, __version__, __author__, \
win32event, win32api, winerror, fcntl, \
@ -22,7 +22,7 @@ import mechanize
if False:
winutil, winutilerror, __appname__, islinux, __version__
fcntl, win32event, isfrozen, __author__, terminal_controller
winerror, win32api
winerror, win32api, isfreebsd
mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')

View File

@ -22,7 +22,8 @@ terminal_controller = TerminalController(sys.stdout)
iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
isosx = 'darwin' in sys.platform.lower()
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
islinux = not(iswindows or isosx)
isfreebsd = 'freebsd' in sys.platform.lower()
islinux = not(iswindows or isosx or isfreebsd)
isfrozen = hasattr(sys, 'frozen')
isunix = isosx or islinux
@ -56,7 +57,8 @@ if plugins is None:
sys.path.insert(0, plugin_path)
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
'fontconfig', 'pdfreflow', 'progress_indicator'] + \
'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
'chm_extra'] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):
try:

View File

@ -119,11 +119,34 @@ class Plugin(object):
def __enter__(self, *args):
if self.plugin_path is not None:
sys.path.insert(0, self.plugin_path)
from calibre.utils.zipfile import ZipFile
zf = ZipFile(self.plugin_path)
extensions = set([x.rpartition('.')[-1].lower() for x in
zf.namelist()])
zip_safe = True
for ext in ('pyd', 'so', 'dll', 'dylib'):
if ext in extensions:
zip_safe = False
if zip_safe:
sys.path.insert(0, self.plugin_path)
self._sys_insertion_path = self.plugin_path
else:
from calibre.ptempfile import TemporaryDirectory
self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
self._sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
zf.extractall(self._sys_insertion_path)
sys.path.insert(0, self._sys_insertion_path)
zf.close()
def __exit__(self, *args):
if self.plugin_path in sys.path:
sys.path.remove(self.plugin_path)
ip, it = getattr(self, '_sys_insertion_path', None), getattr(self,
'_sys_insertion_tdir', None)
if ip in sys.path:
sys.path.remove(ip)
if hasattr(it, '__exit__'):
it.__exit__(*args)
class FileTypePlugin(Plugin):

View File

@ -103,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
mi.cover_data = (ext.lower(), data)
return mi
class CHMMetadataReader(MetadataReaderPlugin):
name = 'Read CHM metadata'
file_types = set(['chm'])
description = _('Read metadata from %s files') % 'CHM'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.chm import get_metadata
return get_metadata(stream)
class EPUBMetadataReader(MetadataReaderPlugin):
name = 'Read EPUB metadata'
@ -384,6 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.tcr.input import TCRInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lrf.input import LRFInput
from calibre.ebooks.chm.input import CHMInput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.fb2.output import FB2Output
@ -444,6 +456,7 @@ plugins += [
TCRInput,
TXTInput,
LRFInput,
CHMInput,
]
plugins += [
EPUBOutput,

View File

@ -401,7 +401,7 @@ def initialize_plugins():
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
except PluginNotFound:
continue
plugin = initialize_plugin(plugin, zfp if not isinstance(zfp, type) else zfp)
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
_initialized_plugins.append(plugin)
except:
print 'Failed to initialize plugin...'

View File

@ -23,6 +23,8 @@ Run an embedded python interpreter.
help='Debug the specified device driver.')
parser.add_option('-g', '--gui', default=False, action='store_true',
help='Run the GUI',)
parser.add_option('-w', '--viewer', default=False, action='store_true',
help='Run the ebook viewer',)
parser.add_option('--paths', default=False, action='store_true',
help='Output the paths necessary to setup the calibre environment')
parser.add_option('--migrate', action='store_true', default=False,
@ -98,6 +100,12 @@ def main(args=sys.argv):
if opts.gui:
from calibre.gui2.main import main
main(['calibre'])
elif opts.viewer:
from calibre.gui2.viewer.main import main
vargs = ['ebook-viewer', '--debug-javascript']
if len(args) > 1:
vargs.append(args[-1])
main(vargs)
elif opts.command:
sys.argv = args[:1]
exec opts.command

View File

@ -15,7 +15,7 @@ class ANDROID(USBMS):
supported_platforms = ['windows', 'osx', 'linux']
# Ordered list of supported formats
FORMATS = ['epub']
FORMATS = ['epub', 'pdf']
VENDOR_ID = {
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},

View File

@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
from errno import EBUSY, ENOMEM
from calibre import iswindows, isosx, load_library
from calibre import iswindows, isosx, isfreebsd, load_library
_libusb_name = 'libusb'
PATH_MAX = 511 if iswindows else 1024 if isosx else 4096
PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
if iswindows:
class Structure(_Structure):
_pack_ = 1

View File

@ -398,16 +398,6 @@ class Device(DeviceConfig, DevicePlugin):
if len(matches) > 2:
drives['cardb'] = matches[2]
pat = self.OSX_MAIN_MEM_VOL_PAT
if pat is not None and len(drives) > 1 and 'main' in drives:
if pat.search(drives['main']) is None:
main = drives['main']
for x in ('carda', 'cardb'):
if x in drives and pat.search(drives[x]):
drives['main'] = drives.pop(x)
drives[x] = main
break
return drives
def osx_bsd_names(self):
@ -431,6 +421,16 @@ class Device(DeviceConfig, DevicePlugin):
if drives['main'] is None:
print bsd_drives, mount_map, drives
raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
pat = self.OSX_MAIN_MEM_VOL_PAT
if pat is not None and len(drives) > 1 and 'main' in drives:
if pat.search(drives['main']) is None:
main = drives['main']
for x in ('carda', 'cardb'):
if x in drives and pat.search(drives[x]):
drives['main'] = drives.pop(x)
drives[x] = main
break
self._main_prefix = drives['main']+os.sep
def get_card_prefix(c):
ans = drives.get(c, None)

View File

@ -1,213 +1,17 @@
from __future__ import with_statement
''' CHM File decoding support '''
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.'
import os, shutil, uuid, re
from tempfile import mkdtemp
from mimetypes import guess_type as guess_mimetype
import os, uuid
from BeautifulSoup import BeautifulSoup, NavigableString
from lxml import html
from pychm.chm import CHMFile
from pychm.chmlib import (
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
chm_enumerate,
)
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.toc import TOC
from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
def match_string(s1, s2_already_lowered):
if s1 is not None and s2_already_lowered is not None:
if s1.lower()==s2_already_lowered:
return True
return False
def check_all_prev_empty(tag):
if tag is None:
return True
if tag.__class__ == NavigableString and not check_empty(tag):
return False
return check_all_prev_empty(tag.previousSibling)
def check_empty(s, rex = re.compile(r'\S')):
return rex.search(s) is None
def option_parser():
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
parser.add_option("-t", "--title", action="store", type="string", \
dest="title", help=_("Set the book title"))
parser.add_option('--title-sort', action='store', type='string', default=None,
dest='title_sort', help=_('Set sort key for the title'))
parser.add_option("-a", "--author", action="store", type="string", \
dest="author", help=_("Set the author"))
parser.add_option('--author-sort', action='store', type='string', default=None,
dest='author_sort', help=_('Set sort key for the author'))
parser.add_option("-c", "--category", action="store", type="string", \
dest="category", help=_("The category this book belongs"
" to. E.g.: History"))
parser.add_option("--thumbnail", action="store", type="string", \
dest="thumbnail", help=_("Path to a graphic that will be"
" set as this files' thumbnail"))
parser.add_option("--comment", action="store", type="string", \
dest="freetext", help=_("Path to a txt file containing a comment."))
parser.add_option("--get-thumbnail", action="store_true", \
dest="get_thumbnail", default=False, \
help=_("Extract thumbnail from LRF file"))
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
parser.add_option('--classification', default=None, help=_('Set the book classification'))
parser.add_option('--creator', default=None, help=_('Set the book creator'))
parser.add_option('--producer', default=None, help=_('Set the book producer'))
parser.add_option('--get-cover', action='store_true', default=False,
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
parser.add_option('--bookid', action='store', type='string', default=None,
dest='book_id', help=_('Set book ID'))
parser.add_option('--font-delta', action='store', type='int', default=0,
dest='font_delta', help=_('Set font delta'))
return parser
class CHMError(Exception):
pass
class CHMReader(CHMFile):
def __init__(self, input, log):
CHMFile.__init__(self)
if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log
self._sourcechm = input
self._contents = None
self._playorder = 0
self._metadata = False
self._extracted = False
# location of '.hhc' file, which is the CHM TOC.
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
self.hhc_path = self.root + ".hhc"
def _parse_toc(self, ul, basedir=os.getcwdu()):
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
self._playorder += 1
for li in ul('li', recursive=False):
href = li.object('param', {'name': 'Local'})[0]['value']
if href.count('#'):
href, frag = href.split('#')
else:
frag = None
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
#print "========>", name
toc.add_item(href, frag, name, play_order=self._playorder)
self._playorder += 1
if li.ul:
child = self._parse_toc(li.ul)
child.parent = toc
toc.append(child)
#print toc
return toc
def GetFile(self, path):
# have to have abs paths for ResolveObject, but Contents() deliberately
# makes them relative. So we don't have to worry, re-add the leading /.
# note this path refers to the internal CHM structure
if path[0] != '/':
path = '/' + path
res, ui = self.ResolveObject(path)
if res != CHM_RESOLVE_SUCCESS:
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
size, data = self.RetrieveObject(ui)
if size == 0:
raise CHMError("'%s' is zero bytes in length!"%(path,))
return data
def ExtractFiles(self, output_dir=os.getcwdu()):
for path in self.Contents():
lpath = os.path.join(output_dir, path)
self._ensure_dir(lpath)
data = self.GetFile(path)
with open(lpath, 'wb') as f:
if guess_mimetype(path)[0] == ('text/html'):
data = self._reformat(data)
f.write(data)
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
self._extracted = True
def _reformat(self, data):
try:
soup = BeautifulSoup(data)
except UnicodeEncodeError:
# hit some strange encoding problems...
print "Unable to parse html for cleaning, leaving it :("
return data
# nuke javascript...
[s.extract() for s in soup('script')]
# remove forward and back nav bars from the top/bottom of each page
# cos they really fuck with the flow of things and generally waste space
# since we can't use [a,b] syntax to select arbitrary items from a list
# we'll have to do this manually...
t = soup('table')
if t:
if (t[0].previousSibling is None
or t[0].previousSibling.previousSibling is None):
t[0].extract()
if (t[-1].nextSibling is None
or t[-1].nextSibling.nextSibling is None):
t[-1].extract()
# for some very odd reason each page's content appears to be in a table
# too. and this table has sub-tables for random asides... grr.
# remove br at top of page if present after nav bars removed
br = soup('br')
if br:
if check_all_prev_empty(br[0].previousSibling):
br[0].extract()
# some images seem to be broken in some chm's :/
for img in soup('img'):
try:
# some are supposedly "relative"... lies.
while img['src'].startswith('../'): img['src'] = img['src'][3:]
# some have ";<junk>" at the end.
img['src'] = img['src'].split(';')[0]
except KeyError:
# and some don't even have a src= ?!
pass
# now give back some pretty html.
return soup.prettify()
def Contents(self):
if self._contents is not None:
return self._contents
paths = []
def get_paths(chm, ui, ctx):
# skip directories
# note this path refers to the internal CHM structure
if ui.path[-1] != '/':
# and make paths relative
paths.append(ui.path.lstrip('/'))
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
self._contents = paths
return self._contents
def _ensure_dir(self, path):
dir = os.path.dirname(path)
if not os.path.isdir(dir):
os.makedirs(dir)
def extract_content(self, output_dir=os.getcwdu()):
self.ExtractFiles(output_dir=output_dir)
class CHMInput(InputFormatPlugin):
name = 'CHM Input'
@ -215,12 +19,8 @@ class CHMInput(InputFormatPlugin):
description = 'Convert CHM files to OEB'
file_types = set(['chm'])
options = set([
OptionRecommendation(name='dummy_option', recommended_value=False,
help=_('dummy option until real options are determined.')),
])
def _chmtohtml(self, output_dir, chm_path, no_images, log):
from calibre.ebooks.chm.reader import CHMReader
log.debug('Opening CHM file')
rdr = CHMReader(chm_path, log)
log.debug('Extracting CHM to %s' % output_dir)
@ -230,37 +30,36 @@ class CHMInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log, accelerators):
from calibre.ebooks.metadata.chm import get_metadata_
from calibre.customize.ui import plugin_for_input_format
log.debug('Processing CHM...')
tdir = mkdtemp(prefix='chm2oeb_')
from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8'
no_images = False #options.no_images
chm_name = stream.name
#chm_data = stream.read()
with TemporaryDirectory('chm2oeb') as tdir:
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8'
no_images = False #options.no_images
chm_name = stream.name
#chm_data = stream.read()
#closing stream so CHM can be opened by external library
stream.close()
log.debug('tdir=%s' % tdir)
log.debug('stream.name=%s' % stream.name)
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
mainpath = os.path.join(tdir, mainname)
#closing stream so CHM can be opened by external library
stream.close()
log.debug('tdir=%s' % tdir)
log.debug('stream.name=%s' % stream.name)
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
mainpath = os.path.join(tdir, mainname)
metadata = get_metadata_(tdir)
metadata = get_metadata_(tdir)
odi = options.debug_pipeline
options.debug_pipeline = None
# try a custom conversion:
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
# try using html converter:
htmlpath = self._create_html_root(mainpath, log)
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
options.debug_pipeline = odi
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
shutil.rmtree(tdir)
odi = options.debug_pipeline
options.debug_pipeline = None
# try a custom conversion:
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
# try using html converter:
htmlpath = self._create_html_root(mainpath, log)
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
options.debug_pipeline = odi
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
return oeb
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
@ -369,6 +168,8 @@ class CHMInput(InputFormatPlugin):
# check that node is a normal node (not a comment, DOCTYPE, etc.)
# (normal nodes have string tags)
if isinstance(node.tag, basestring):
from calibre.ebooks.chm.reader import match_string
if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
for child in node:
if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):

View File

@ -0,0 +1,207 @@
from __future__ import with_statement
''' CHM File decoding support '''
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.'
import os, re
from mimetypes import guess_type as guess_mimetype
from BeautifulSoup import BeautifulSoup, NavigableString
from calibre.utils.chm.chm import CHMFile
from calibre.utils.chm.chmlib import (
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
chm_enumerate,
)
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.toc import TOC
def match_string(s1, s2_already_lowered):
if s1 is not None and s2_already_lowered is not None:
if s1.lower()==s2_already_lowered:
return True
return False
def check_all_prev_empty(tag):
if tag is None:
return True
if tag.__class__ == NavigableString and not check_empty(tag):
return False
return check_all_prev_empty(tag.previousSibling)
def check_empty(s, rex = re.compile(r'\S')):
return rex.search(s) is None
def option_parser():
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
parser.add_option("-t", "--title", action="store", type="string", \
dest="title", help=_("Set the book title"))
parser.add_option('--title-sort', action='store', type='string', default=None,
dest='title_sort', help=_('Set sort key for the title'))
parser.add_option("-a", "--author", action="store", type="string", \
dest="author", help=_("Set the author"))
parser.add_option('--author-sort', action='store', type='string', default=None,
dest='author_sort', help=_('Set sort key for the author'))
parser.add_option("-c", "--category", action="store", type="string", \
dest="category", help=_("The category this book belongs"
" to. E.g.: History"))
parser.add_option("--thumbnail", action="store", type="string", \
dest="thumbnail", help=_("Path to a graphic that will be"
" set as this files' thumbnail"))
parser.add_option("--comment", action="store", type="string", \
dest="freetext", help=_("Path to a txt file containing a comment."))
parser.add_option("--get-thumbnail", action="store_true", \
dest="get_thumbnail", default=False, \
help=_("Extract thumbnail from LRF file"))
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
parser.add_option('--classification', default=None, help=_('Set the book classification'))
parser.add_option('--creator', default=None, help=_('Set the book creator'))
parser.add_option('--producer', default=None, help=_('Set the book producer'))
parser.add_option('--get-cover', action='store_true', default=False,
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
parser.add_option('--bookid', action='store', type='string', default=None,
dest='book_id', help=_('Set book ID'))
parser.add_option('--font-delta', action='store', type='int', default=0,
dest='font_delta', help=_('Set font delta'))
return parser
class CHMError(Exception):
pass
class CHMReader(CHMFile):
def __init__(self, input, log):
CHMFile.__init__(self)
if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log
self._sourcechm = input
self._contents = None
self._playorder = 0
self._metadata = False
self._extracted = False
# location of '.hhc' file, which is the CHM TOC.
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
self.hhc_path = self.root + ".hhc"
def _parse_toc(self, ul, basedir=os.getcwdu()):
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
self._playorder += 1
for li in ul('li', recursive=False):
href = li.object('param', {'name': 'Local'})[0]['value']
if href.count('#'):
href, frag = href.split('#')
else:
frag = None
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
#print "========>", name
toc.add_item(href, frag, name, play_order=self._playorder)
self._playorder += 1
if li.ul:
child = self._parse_toc(li.ul)
child.parent = toc
toc.append(child)
#print toc
return toc
def GetFile(self, path):
# have to have abs paths for ResolveObject, but Contents() deliberately
# makes them relative. So we don't have to worry, re-add the leading /.
# note this path refers to the internal CHM structure
if path[0] != '/':
path = '/' + path
res, ui = self.ResolveObject(path)
if res != CHM_RESOLVE_SUCCESS:
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
size, data = self.RetrieveObject(ui)
if size == 0:
raise CHMError("'%s' is zero bytes in length!"%(path,))
return data
def ExtractFiles(self, output_dir=os.getcwdu()):
for path in self.Contents():
lpath = os.path.join(output_dir, path)
self._ensure_dir(lpath)
data = self.GetFile(path)
with open(lpath, 'wb') as f:
if guess_mimetype(path)[0] == ('text/html'):
data = self._reformat(data)
f.write(data)
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
self._extracted = True
def _reformat(self, data):
try:
soup = BeautifulSoup(data)
except UnicodeEncodeError:
# hit some strange encoding problems...
print "Unable to parse html for cleaning, leaving it :("
return data
# nuke javascript...
[s.extract() for s in soup('script')]
# remove forward and back nav bars from the top/bottom of each page
# cos they really fuck with the flow of things and generally waste space
# since we can't use [a,b] syntax to select arbitrary items from a list
# we'll have to do this manually...
t = soup('table')
if t:
if (t[0].previousSibling is None
or t[0].previousSibling.previousSibling is None):
t[0].extract()
if (t[-1].nextSibling is None
or t[-1].nextSibling.nextSibling is None):
t[-1].extract()
# for some very odd reason each page's content appears to be in a table
# too. and this table has sub-tables for random asides... grr.
# remove br at top of page if present after nav bars removed
br = soup('br')
if br:
if check_all_prev_empty(br[0].previousSibling):
br[0].extract()
# some images seem to be broken in some chm's :/
for img in soup('img'):
try:
# some are supposedly "relative"... lies.
while img['src'].startswith('../'): img['src'] = img['src'][3:]
# some have ";<junk>" at the end.
img['src'] = img['src'].split(';')[0]
except KeyError:
# and some don't even have a src= ?!
pass
# now give back some pretty html.
return soup.prettify()
def Contents(self):
if self._contents is not None:
return self._contents
paths = []
def get_paths(chm, ui, ctx):
# skip directories
# note this path refers to the internal CHM structure
if ui.path[-1] != '/':
# and make paths relative
paths.append(ui.path.lstrip('/'))
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
self._contents = paths
return self._contents
def _ensure_dir(self, path):
dir = os.path.dirname(path)
if not os.path.isdir(dir):
os.makedirs(dir)
def extract_content(self, output_dir=os.getcwdu()):
self.ExtractFiles(output_dir=output_dir)

View File

@ -731,7 +731,8 @@ OptionRecommendation(name='timestamp',
zf = ZipFile(os.path.join(self.opts.debug_pipeline,
'periodical.downloaded_recipe'), 'w')
zf.add_dir(out_dir)
self.input_plugin.save_download(zf)
with self.input_plugin:
self.input_plugin.save_download(zf)
zf.close()
self.log.info('Input debug saved to:', out_dir)
@ -780,28 +781,29 @@ OptionRecommendation(name='timestamp',
self.ui_reporter(0.01, _('Converting input to HTML...'))
ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
self.input_plugin.report_progress = ir
self.oeb = self.input_plugin(stream, self.opts,
self.input_fmt, self.log,
accelerators, tdir)
if self.opts.debug_pipeline is not None:
self.dump_input(self.oeb, tdir)
if self.abort_after_input_dump:
return
if self.input_fmt in ('recipe', 'downloaded_recipe'):
self.opts_to_mi(self.user_metadata)
if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
self.input_plugin)
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
self.opts.is_image_collection = self.input_plugin.is_image_collection
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
self.flush()
if self.opts.debug_pipeline is not None:
out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
self.dump_oeb(self.oeb, out_dir)
self.log('Parsed HTML written to:', out_dir)
self.input_plugin.specialize(self.oeb, self.opts, self.log,
self.output_fmt)
with self.input_plugin:
self.oeb = self.input_plugin(stream, self.opts,
self.input_fmt, self.log,
accelerators, tdir)
if self.opts.debug_pipeline is not None:
self.dump_input(self.oeb, tdir)
if self.abort_after_input_dump:
return
if self.input_fmt in ('recipe', 'downloaded_recipe'):
self.opts_to_mi(self.user_metadata)
if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts,
self.input_plugin)
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
self.opts.is_image_collection = self.input_plugin.is_image_collection
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
self.flush()
if self.opts.debug_pipeline is not None:
out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
self.dump_oeb(self.oeb, out_dir)
self.log('Parsed HTML written to:', out_dir)
self.input_plugin.specialize(self.oeb, self.opts, self.log,
self.output_fmt)
pr(0., _('Running transforms on ebook...'))
@ -891,7 +893,8 @@ OptionRecommendation(name='timestamp',
our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
self.output_plugin.report_progress = our
our(0., _('Creating')+' %s'%self.output_plugin.name)
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
with self.output_plugin:
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log)
self.ui_reporter(1.)
run_plugins_on_postprocess(self.output, self.output_fmt)

View File

@ -157,11 +157,9 @@ class EPUBOutput(OutputFormatPlugin):
self.workaround_ade_quirks()
self.workaround_webkit_quirks()
self.workaround_sony_quirks()
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
RescaleImages()(oeb, opts)
from calibre.ebooks.oeb.transforms.split import Split
split = Split(not self.opts.dont_split_on_page_breaks,
max_flow_size=self.opts.flow_size*1024
@ -170,6 +168,8 @@ class EPUBOutput(OutputFormatPlugin):
self.insert_cover()
self.workaround_sony_quirks()
with TemporaryDirectory('_epub_output') as tdir:
from calibre.customize.ui import plugin_for_output_format
oeb_output = plugin_for_output_format('oeb')

View File

@ -20,7 +20,7 @@ from itertools import izip
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation
from calibre.constants import islinux
from calibre.constants import islinux, isfreebsd
from calibre import unicode_path
from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename
@ -346,7 +346,7 @@ class HTMLInput(InputFormatPlugin):
self.added_resources = {}
self.log = log
for path, href in htmlfile_map.items():
if not islinux:
if not (islinux or isfreebsd):
path = path.lower()
self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
@ -417,7 +417,7 @@ class HTMLInput(InputFormatPlugin):
if os.path.isdir(link):
self.log.warn(link_, 'is a link to a directory. Ignoring.')
return link_
if not islinux:
if not (islinux or isfreebsd):
link = link.lower()
if link not in self.added_resources:
bhref = os.path.basename(link)

View File

@ -215,6 +215,28 @@ def merge_results(one, two):
else:
one[idx].smart_update(x)
class MetadataSources(object):
def __init__(self, sources):
self.sources = sources
def __enter__(self):
for s in self.sources:
s.__enter__()
return self
def __exit__(self, *args):
for s in self.sources:
s.__exit__()
def __call__(self, *args, **kwargs):
for s in self.sources:
s(*args, **kwargs)
def join(self):
for s in self.sources:
s.join()
def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
verbose=0):
assert not(title is None and author is None and publisher is None and \
@ -224,11 +246,10 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
if isbn is not None:
isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
fetchers = list(metadata_sources(isbndb_key=isbndb_key))
with MetadataSources(fetchers) as manager:
manager(title, author, publisher, isbn, verbose)
manager.join()
for fetcher in fetchers:
fetcher(title, author, publisher, isbn, verbose)
for fetcher in fetchers:
fetcher.join()
results = list(fetchers[0].results)
for fetcher in fetchers[1:]:
merge_results(results, fetcher.results)
@ -243,10 +264,9 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
def get_social_metadata(mi, verbose=0):
from calibre.customize.ui import metadata_sources
fetchers = list(metadata_sources(metadata_type='social'))
for fetcher in fetchers:
fetcher(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
for fetcher in fetchers:
fetcher.join()
with MetadataSources(fetchers) as manager:
manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
manager.join()
ratings, tags, comments = [], set([]), set([])
for fetcher in fetchers:
if fetcher.results:

View File

@ -4,13 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Read data from .mobi files
'''
import functools
import os
import re
import struct
import textwrap
import cStringIO
import sys
import functools, shutil, os, re, struct, textwrap, cStringIO, sys
try:
from PIL import Image as PILImage
@ -620,6 +614,16 @@ class MobiReader(object):
* opf.cover.split('/'))):
opf.cover = None
cover = opf.cover
if cover is not None:
cover = cover.replace('/', os.sep)
if os.path.exists(cover):
ncover = 'images'+os.sep+'calibre_cover.jpg'
if os.path.exists(ncover):
os.remove(ncover)
shutil.copyfile(cover, ncover)
opf.cover = ncover.replace(os.sep, '/')
manifest = [(htmlfile, 'application/xhtml+xml'),
(os.path.abspath('styles.css'), 'text/css')]
bp = os.path.dirname(htmlfile)

View File

@ -173,7 +173,8 @@ class EbookIterator(object):
plumber.opts.no_process = True
plumber.input_plugin.for_viewer = True
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
with plumber.input_plugin:
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)

View File

@ -13,7 +13,7 @@ from functools import partial
from calibre.ebooks import ConversionError, DRMError
from calibre.ptempfile import PersistentTemporaryFile
from calibre import isosx, iswindows, islinux
from calibre import isosx, iswindows, islinux, isfreebsd
from calibre import CurrentDir
PDFTOHTML = 'pdftohtml'
@ -23,7 +23,7 @@ if isosx and hasattr(sys, 'frameworks_dir'):
if iswindows and hasattr(sys, 'frozen'):
PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
if islinux and getattr(sys, 'frozen_path', False):
if (islinux or isfreebsd) and getattr(sys, 'frozen_path', False):
PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
def pdftohtml(output_dir, pdf_path, no_images):

View File

@ -142,7 +142,7 @@ class RTFMLizer(object):
def image_to_hexstring(self, data):
im = Image.open(cStringIO.StringIO(data))
data = cStringIO.StringIO()
im.save(data, 'JPEG')
im.convert('RGB').save(data, 'JPEG')
data = data.getvalue()
raw_hex = ''

View File

@ -12,7 +12,7 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
ORG_NAME = 'KovidsBrain'
APP_UID = 'libprs500'
from calibre import islinux, iswindows, isosx
from calibre import islinux, iswindows, isosx, isfreebsd
from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
from calibre.utils.localization import set_qt_translator
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
@ -579,7 +579,7 @@ _store_app = None
def is_ok_to_use_qt():
global gui_thread, _store_app
if islinux and ':' not in os.environ.get('DISPLAY', ''):
if (islinux or isfreebsd) and ':' not in os.environ.get('DISPLAY', ''):
return False
if _store_app is None and QApplication.instance() is None:
_store_app = QApplication([])

View File

@ -25,7 +25,7 @@ from calibre.ebooks import BOOK_EXTENSIONS
from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, \
authors_to_string, check_isbn
from calibre.ebooks.metadata.library_thing import cover_from_isbn
from calibre import islinux
from calibre import islinux, isfreebsd
from calibre.ebooks.metadata.meta import get_metadata
from calibre.utils.config import prefs, tweaks
from calibre.utils.date import qt_to_dt
@ -389,8 +389,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
height_of_rest = self.frameGeometry().height() - self.cover.height()
width_of_rest = self.frameGeometry().width() - self.cover.width()
ag = QCoreApplication.instance().desktop().availableGeometry(self)
self.cover.MAX_HEIGHT = ag.height()-(25 if islinux else 0)-height_of_rest
self.cover.MAX_WIDTH = ag.width()-(25 if islinux else 0)-width_of_rest
self.cover.MAX_HEIGHT = ag.height()-(25 if (islinux or isfreebsd) else 0)-height_of_rest
self.cover.MAX_WIDTH = ag.width()-(25 if (islinux or isfreebsd) else 0)-width_of_rest
if cover:
pm = QPixmap()
pm.loadFromData(cover)

View File

@ -5,7 +5,7 @@ import sys, logging, os, traceback, time
from PyQt4.QtGui import QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon
from PyQt4.QtCore import Qt, QObject, SIGNAL, QCoreApplication, QThread
from calibre import __appname__, setup_cli_handlers, islinux
from calibre import __appname__, setup_cli_handlers, islinux, isfreebsd
from calibre.ebooks.lrf.lrfparser import LRFDocument
from calibre.gui2 import ORG_NAME, APP_UID, error_dialog, \
@ -258,7 +258,7 @@ def file_renderer(stream, opts, parent=None, logger=None):
level = logging.DEBUG if opts.verbose else logging.INFO
logger = logging.getLogger('lrfviewer')
setup_cli_handlers(logger, level)
if islinux:
if islinux or isfreebsd:
try: # Set lrfviewer as the default for LRF files for this user
from subprocess import call
call('xdg-mime default calibre-lrfviewer.desktop application/lrf', shell=True)
@ -307,7 +307,7 @@ def main(args=sys.argv, logger=None):
if hasattr(opts, 'help'):
parser.print_help()
return 1
pid = os.fork() if islinux else -1
pid = os.fork() if (islinux or isfreebsd) else -1
if pid <= 0:
app = Application(args)
app.setWindowIcon(QIcon(I('viewer.svg')))

View File

@ -20,7 +20,7 @@ from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \
info_dialog, error_dialog
from calibre.ebooks.oeb.iterator import EbookIterator
from calibre.ebooks import DRMError
from calibre.constants import islinux
from calibre.constants import islinux, isfreebsd
from calibre.utils.config import Config, StringConfig, dynamic
from calibre.gui2.search_box import SearchBox2
from calibre.ebooks.metadata import MetaInformation
@ -686,7 +686,7 @@ View an ebook.
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
pid = os.fork() if False and islinux else -1
pid = os.fork() if False and (islinux or isfreebsd) else -1
if pid <= 0:
app = Application(args)
app.setWindowIcon(QIcon(I('viewer.svg')))

View File

@ -1294,7 +1294,7 @@ class EPUB_MOBI(CatalogPlugin):
if entry_type:
user_notes[location] = dict(type=entry_type, id=self.id,
text=data[eo+8:eo+8+rec_len].decode('utf-16-be'))
text=data[eo+8:eo+8+rec_len].decode('utf-16-be'))
#print " %2d: %s %s" % (current_entry, entry_type,'at %d' % location if location else '')
#if current_block == 'text_block':
#self.textdump(text)
@ -1307,12 +1307,17 @@ class EPUB_MOBI(CatalogPlugin):
while sig == 'BKMK':
# Fix start location for Highlights using BKMK data
end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
#print "looking for end_loc %d in BKMK" % end_loc
if end_loc in user_notes and user_notes[end_loc]['type'] != 'Note':
start, = unpack('>I', data[eo+8:eo+12])
user_notes[start] = user_notes[end_loc]
user_notes.pop(end_loc)
#print "changing start location of %d to %d" % (end_loc,start)
else:
# If a bookmark coincides with a user annotation, the locs could
# be the same - cheat by nudging -1
# Skip bookmark for last_read_location
if end_loc != self.last_read_location:
user_notes[end_loc - 1] = dict(type='Bookmark',id=self.id,text=None)
rec_len, = unpack('>I', data[eo+4:eo+8])
eo += rec_len + 8
sig = data[eo:eo+4]
@ -3361,7 +3366,7 @@ class EPUB_MOBI(CatalogPlugin):
self.ncxSoup = ncx_soup
def updateLibraryComments(self):
# Push user notes back to library
# Append user notes to library book['comments'], catalog book['description']
from calibre.library.cli import send_message as calibre_send_message
if self.bookmarked_books:
@ -3395,22 +3400,30 @@ class EPUB_MOBI(CatalogPlugin):
user_notes = self.bookmarked_books[id][0].user_notes
annotations = []
'''
spanTag = Tag(ka_soup, 'span')
spanTag['style'] = 'font-style:italic;font-weight:bold'
spanTag.insert(0,NavigableString("Kindle Annotations"))
divTag.insert(dtc, spanTag)
dtc += 1
divTag.insert(dtc, Tag(ka_soup,'br'))
dtc += 1
'''
if False:
spanTag = Tag(ka_soup, 'span')
spanTag['style'] = 'font-style:italic;font-weight:bold;text-align:right'
spanTag.insert(0,NavigableString("Kindle Annotations"))
divTag.insert(dtc, spanTag)
dtc += 1
divTag.insert(dtc, Tag(ka_soup,'br'))
dtc += 1
# Add the annotations sorted by location
# Italicize highlighted text
for location in sorted(user_notes):
annotations.append('<b>Location %d &bull; %s</b><br />%s<br />' % \
self.magicKindleLocationCalculator(location),
user_notes[location]['type'],
user_notes[location]['text'])
if user_notes[location]['text']:
annotations.append('<b>Location %d &bull; %s</b><br />%s<br />' % \
(self.magicKindleLocationCalculator(location),
user_notes[location]['type'],
user_notes[location]['text'] if \
user_notes[location]['type'] == 'Note' else \
'<i>%s</i>' % user_notes[location]['text']))
else:
annotations.append('<b>Location %d &bull; %s</b><br />' % \
(self.magicKindleLocationCalculator(location),
user_notes[location]['type']))
for annotation in annotations:
divTag.insert(dtc, annotation)
dtc += 1

View File

@ -196,6 +196,8 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
for x in data:
if isinstance(x['fmt_epub'], unicode):
x['fmt_epub'] = x['fmt_epub'].encode('utf-8')
if isinstance(x['cover'], unicode):
x['cover'] = x['cover'].encode('utf-8')
template = MarkupTemplate(STANZA_TEMPLATE)
return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle,
sep=os.sep, quote=quote, updated=db.last_modified()).render('xml')

View File

@ -263,7 +263,7 @@ class ResultCache(SearchQueryParser):
if item[loc].strip() != '':
continue
matches.add(item[0])
break
continue
continue ### item is empty. No possible matches below
if q == 'true':

View File

@ -7,6 +7,7 @@ import sys, os, shutil, cPickle, textwrap, stat
from subprocess import check_call
from calibre import __appname__, prints
from calibre.constants import islinux, isfreebsd
entry_points = {
@ -128,20 +129,23 @@ class PostInstall:
self.icon_resources = []
self.menu_resources = []
self.mime_resources = []
self.setup_completion()
self.setup_udev_rules()
if islinux:
self.setup_completion()
self.setup_udev_rules()
self.install_man_pages()
self.setup_desktop_integration()
if islinux:
self.setup_desktop_integration()
self.create_uninstaller()
from calibre.utils.config import config_dir
if os.path.exists(config_dir):
os.chdir(config_dir)
for f in os.listdir('.'):
if os.stat(f).st_uid == 0:
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
if os.stat(config_dir).st_uid == 0:
os.rmdir(config_dir)
if islinux:
for f in os.listdir('.'):
if os.stat(f).st_uid == 0:
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
if os.stat(config_dir).st_uid == 0:
os.rmdir(config_dir)
if warn is None and self.warnings:
self.info('There were %d warnings'%len(self.warnings))
@ -318,7 +322,10 @@ class PostInstall:
def install_man_pages(self):
try:
from calibre.utils.help2man import create_man_page
manpath = os.path.join(self.opts.staging_sharedir, 'man/man1')
if isfreebsd:
manpath = os.path.join(self.opts.staging_root, 'man/man1')
else:
manpath = os.path.join(self.opts.staging_sharedir, 'man/man1')
if not os.path.exists(manpath):
os.makedirs(manpath)
self.info('Installing MAN pages...')
@ -331,7 +338,10 @@ class PostInstall:
continue
parser = parser()
raw = create_man_page(prog, parser)
manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2')
if isfreebsd:
manfile = os.path.join(manpath, prog+'.1')
else:
manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2')
self.info('\tInstalling MAN page for', prog)
open(manfile, 'wb').write(raw)
self.manifest.append(manfile)

View File

@ -204,6 +204,10 @@ terminal. For example, you can start the GUI from the terminal as::
calibre-debug -g
Similarly, you can start the ebook-viewer as::
calibre-debug -w /path/to/file/to/be/viewed
Executing arbitrary scripts in the calibre python environment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -0,0 +1,34 @@
## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
## pychm is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
## You should have received a copy of the GNU General Public
## License along with this program; see the file COPYING. If not,
## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
## Boston, MA 02111-1307, USA
## $Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $
'''
chm - A package to manipulate CHM files
The chm package provides four modules: chm, chmlib, extra and
_chmlib. _chmlib and chmlib are very low level libraries generated
from SWIG interface files, and are simple wrappers around the API
defined by the C library chmlib.
The extra module adds full-text search support.
the chm module provides some higher level classes to simplify
access to the CHM files information.
'''
__all__ = ["chm", "chmlib", "_chmlib", "extra"]
__version__ = "0.8.4"
__revision__ = "$Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $"

View File

@ -0,0 +1,512 @@
## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
## Based on code by:
## Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net>
## pychm is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
## You should have received a copy of the GNU General Public
## License along with this program; see the file COPYING. If not,
## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
## Boston, MA 02111-1307, USA
## $Id: chm.py,v 1.12 2006/08/07 12:31:51 rubensr Exp $
'''
chm - A high-level front end for the chmlib python module.
The chm module provides high level access to the functionality
included in chmlib. It encapsulates functions in the CHMFile class, and
provides some additional features, such as the ability to obtain
the contents tree of a CHM archive.
'''
import array
import string
import sys
import calibre.utils.chm.chmlib as chmlib
from calibre.constants import plugins
extra, extra_err = plugins['chm_extra']
if extra_err:
raise RuntimeError('Failed to load chm.extra: '+extra_err)
charset_table = {
0 : 'iso8859_1', # ANSI_CHARSET
238 : 'iso8859_2', # EASTEUROPE_CHARSET
178 : 'iso8859_6', # ARABIC_CHARSET
161 : 'iso8859_7', # GREEK_CHARSET
177 : 'iso8859_8', # HEBREW_CHARSET
162 : 'iso8859_9', # TURKISH_CHARSET
222 : 'iso8859_11', # THAI_CHARSET - hmm not in python 2.2...
186 : 'iso8859_13', # BALTIC_CHARSET
204 : 'cp1251', # RUSSIAN_CHARSET
255 : 'cp437', # OEM_CHARSET
128 : 'cp932', # SHIFTJIS_CHARSET
134 : 'cp936', # GB2312_CHARSET
129 : 'cp949', # HANGUL_CHARSET
136 : 'cp950', # CHINESEBIG5_CHARSET
1 : None, # DEFAULT_CHARSET
2 : None, # SYMBOL_CHARSET
130 : None, # JOHAB_CHARSET
163 : None, # VIETNAMESE_CHARSET
77 : None, # MAC_CHARSET
}
locale_table = {
0x0436 : ('iso8859_1', "Afrikaans", "Western Europe & US"),
0x041c : ('iso8859_2', "Albanian", "Central Europe"),
0x0401 : ('iso8859_6', "Arabic_Saudi_Arabia", "Arabic"),
0x0801 : ('iso8859_6', "Arabic_Iraq", "Arabic"),
0x0c01 : ('iso8859_6', "Arabic_Egypt", "Arabic"),
0x1001 : ('iso8859_6', "Arabic_Libya", "Arabic"),
0x1401 : ('iso8859_6', "Arabic_Algeria", "Arabic"),
0x1801 : ('iso8859_6', "Arabic_Morocco", "Arabic"),
0x1c01 : ('iso8859_6', "Arabic_Tunisia", "Arabic"),
0x2001 : ('iso8859_6', "Arabic_Oman", "Arabic"),
0x2401 : ('iso8859_6', "Arabic_Yemen", "Arabic"),
0x2801 : ('iso8859_6', "Arabic_Syria", "Arabic"),
0x2c01 : ('iso8859_6', "Arabic_Jordan", "Arabic"),
0x3001 : ('iso8859_6', "Arabic_Lebanon", "Arabic"),
0x3401 : ('iso8859_6', "Arabic_Kuwait", "Arabic"),
0x3801 : ('iso8859_6', "Arabic_UAE", "Arabic"),
0x3c01 : ('iso8859_6', "Arabic_Bahrain", "Arabic"),
0x4001 : ('iso8859_6', "Arabic_Qatar", "Arabic"),
0x042b : (None, "Armenian","Armenian"),
0x042c : ('iso8859_9', "Azeri_Latin", "Turkish"),
0x082c : ('cp1251', "Azeri_Cyrillic", "Cyrillic"),
0x042d : ('iso8859_1', "Basque", "Western Europe & US"),
0x0423 : ('cp1251', "Belarusian", "Cyrillic"),
0x0402 : ('cp1251', "Bulgarian", "Cyrillic"),
0x0403 : ('iso8859_1', "Catalan", "Western Europe & US"),
0x0404 : ('cp950', "Chinese_Taiwan", "Traditional Chinese"),
0x0804 : ('cp936', "Chinese_PRC", "Simplified Chinese"),
0x0c04 : ('cp950', "Chinese_Hong_Kong", "Traditional Chinese"),
0x1004 : ('cp936', "Chinese_Singapore", "Simplified Chinese"),
0x1404 : ('cp950', "Chinese_Macau", "Traditional Chinese"),
0x041a : ('iso8859_2', "Croatian", "Central Europe"),
0x0405 : ('iso8859_2', "Czech", "Central Europe"),
0x0406 : ('iso8859_1', "Danish", "Western Europe & US"),
0x0413 : ('iso8859_1', "Dutch_Standard", "Western Europe & US"),
0x0813 : ('iso8859_1', "Dutch_Belgian", "Western Europe & US"),
0x0409 : ('iso8859_1', "English_United_States", "Western Europe & US"),
0x0809 : ('iso8859_1', "English_United_Kingdom", "Western Europe & US"),
0x0c09 : ('iso8859_1', "English_Australian", "Western Europe & US"),
0x1009 : ('iso8859_1', "English_Canadian", "Western Europe & US"),
0x1409 : ('iso8859_1', "English_New_Zealand", "Western Europe & US"),
0x1809 : ('iso8859_1', "English_Irish", "Western Europe & US"),
0x1c09 : ('iso8859_1', "English_South_Africa", "Western Europe & US"),
0x2009 : ('iso8859_1', "English_Jamaica", "Western Europe & US"),
0x2409 : ('iso8859_1', "English_Caribbean", "Western Europe & US"),
0x2809 : ('iso8859_1', "English_Belize", "Western Europe & US"),
0x2c09 : ('iso8859_1', "English_Trinidad", "Western Europe & US"),
0x3009 : ('iso8859_1', "English_Zimbabwe", "Western Europe & US"),
0x3409 : ('iso8859_1', "English_Philippines", "Western Europe & US"),
0x0425 : ('iso8859_13',"Estonian", "Baltic",),
0x0438 : ('iso8859_1', "Faeroese", "Western Europe & US"),
0x0429 : ('iso8859_6', "Farsi", "Arabic"),
0x040b : ('iso8859_1', "Finnish", "Western Europe & US"),
0x040c : ('iso8859_1', "French_Standard", "Western Europe & US"),
0x080c : ('iso8859_1', "French_Belgian", "Western Europe & US"),
0x0c0c : ('iso8859_1', "French_Canadian", "Western Europe & US"),
0x100c : ('iso8859_1', "French_Swiss", "Western Europe & US"),
0x140c : ('iso8859_1', "French_Luxembourg", "Western Europe & US"),
0x180c : ('iso8859_1', "French_Monaco", "Western Europe & US"),
0x0437 : (None, "Georgian", "Georgian"),
0x0407 : ('iso8859_1', "German_Standard", "Western Europe & US"),
0x0807 : ('iso8859_1', "German_Swiss", "Western Europe & US"),
0x0c07 : ('iso8859_1', "German_Austrian", "Western Europe & US"),
0x1007 : ('iso8859_1', "German_Luxembourg", "Western Europe & US"),
0x1407 : ('iso8859_1', "German_Liechtenstein", "Western Europe & US"),
0x0408 : ('iso8859_7', "Greek", "Greek"),
0x040d : ('iso8859_8', "Hebrew", "Hebrew"),
0x0439 : (None, "Hindi", "Indic"),
0x040e : ('iso8859_2', "Hungarian", "Central Europe"),
0x040f : ('iso8859_1', "Icelandic", "Western Europe & US"),
0x0421 : ('iso8859_1', "Indonesian", "Western Europe & US"),
0x0410 : ('iso8859_1', "Italian_Standard", "Western Europe & US"),
0x0810 : ('iso8859_1', "Italian_Swiss", "Western Europe & US"),
0x0411 : ('cp932', "Japanese", "Japanese"),
0x043f : ('cp1251', "Kazakh", "Cyrillic"),
0x0457 : (None, "Konkani", "Indic"),
0x0412 : ('cp949', "Korean", "Korean"),
0x0426 : ('iso8859_13',"Latvian", "Baltic",),
0x0427 : ('iso8859_13',"Lithuanian", "Baltic",),
0x042f : ('cp1251', "Macedonian", "Cyrillic"),
0x043e : ('iso8859_1', "Malay_Malaysia", "Western Europe & US"),
0x083e : ('iso8859_1', "Malay_Brunei_Darussalam", "Western Europe & US"),
0x044e : (None, "Marathi", "Indic"),
0x0414 : ('iso8859_1', "Norwegian_Bokmal", "Western Europe & US"),
0x0814 : ('iso8859_1', "Norwegian_Nynorsk", "Western Europe & US"),
0x0415 : ('iso8859_2', "Polish", "Central Europe"),
0x0416 : ('iso8859_1', "Portuguese_Brazilian", "Western Europe & US"),
0x0816 : ('iso8859_1', "Portuguese_Standard", "Western Europe & US"),
0x0418 : ('iso8859_2', "Romanian", "Central Europe"),
0x0419 : ('cp1251', "Russian", "Cyrillic"),
0x044f : (None, "Sanskrit", "Indic"),
0x081a : ('iso8859_2', "Serbian_Latin", "Central Europe"),
0x0c1a : ('cp1251', "Serbian_Cyrillic", "Cyrillic"),
0x041b : ('iso8859_2', "Slovak", "Central Europe"),
0x0424 : ('iso8859_2', "Slovenian", "Central Europe"),
0x040a : ('iso8859_1', "Spanish_Trad_Sort", "Western Europe & US"),
0x080a : ('iso8859_1', "Spanish_Mexican", "Western Europe & US"),
0x0c0a : ('iso8859_1', "Spanish_Modern_Sort", "Western Europe & US"),
0x100a : ('iso8859_1', "Spanish_Guatemala", "Western Europe & US"),
0x140a : ('iso8859_1', "Spanish_Costa_Rica", "Western Europe & US"),
0x180a : ('iso8859_1', "Spanish_Panama", "Western Europe & US"),
0x1c0a : ('iso8859_1', "Spanish_Dominican_Repub", "Western Europe & US"),
0x200a : ('iso8859_1', "Spanish_Venezuela", "Western Europe & US"),
0x240a : ('iso8859_1', "Spanish_Colombia", "Western Europe & US"),
0x280a : ('iso8859_1', "Spanish_Peru", "Western Europe & US"),
0x2c0a : ('iso8859_1', "Spanish_Argentina", "Western Europe & US"),
0x300a : ('iso8859_1', "Spanish_Ecuador", "Western Europe & US"),
0x340a : ('iso8859_1', "Spanish_Chile", "Western Europe & US"),
0x380a : ('iso8859_1', "Spanish_Uruguay", "Western Europe & US"),
0x3c0a : ('iso8859_1', "Spanish_Paraguay", "Western Europe & US"),
0x400a : ('iso8859_1', "Spanish_Bolivia", "Western Europe & US"),
0x440a : ('iso8859_1', "Spanish_El_Salvador", "Western Europe & US"),
0x480a : ('iso8859_1', "Spanish_Honduras", "Western Europe & US"),
0x4c0a : ('iso8859_1', "Spanish_Nicaragua", "Western Europe & US"),
0x500a : ('iso8859_1', "Spanish_Puerto_Rico", "Western Europe & US"),
0x0441 : ('iso8859_1', "Swahili", "Western Europe & US"),
0x041d : ('iso8859_1', "Swedish", "Western Europe & US"),
0x081d : ('iso8859_1', "Swedish_Finland", "Western Europe & US"),
0x0449 : (None, "Tamil", "Indic"),
0x0444 : ('cp1251', "Tatar", "Cyrillic"),
0x041e : ('iso8859_11',"Thai", "Thai"),
0x041f : ('iso8859_9', "Turkish", "Turkish"),
0x0422 : ('cp1251', "Ukrainian", "Cyrillic"),
0x0420 : ('iso8859_6', "Urdu", "Arabic"),
0x0443 : ('iso8859_9', "Uzbek_Latin", "Turkish"),
0x0843 : ('cp1251', "Uzbek_Cyrillic", "Cyrillic"),
0x042a : (None, "Vietnamese", "Vietnamese")
}
class CHMFile:
"A class to manage access to CHM files."
filename = ""
file = None
title = ""
home = "/"
index = None
topics = None
encoding = None
lcid = None
binaryindex = None
def __init__(self):
self.searchable = 0
def LoadCHM(self, archiveName):
'''Loads a CHM archive.
This function will also call GetArchiveInfo to obtain information
such as the index file name and the topics file. It returns 1 on
success, and 0 if it fails.
'''
if (self.filename != None):
self.CloseCHM()
self.file = chmlib.chm_open(archiveName)
if (self.file == None):
return 0
self.filename = archiveName
self.GetArchiveInfo()
return 1
def CloseCHM(self):
'''Closes the CHM archive.
This function will close the CHM file, if it is open. All variables
are also reset.
'''
if (self.filename != None):
chmlib.chm_close(self.file)
self.file = None
self.filename = ''
self.title = ""
self.home = "/"
self.index = None
self.topics = None
self.encoding = None
def GetArchiveInfo(self):
'''Obtains information on CHM archive.
This function checks the /#SYSTEM file inside the CHM archive to
obtain the index, home page, topics, encoding and title. It is called
from LoadCHM.
'''
#extra.is_searchable crashed...
#self.searchable = extra.is_searchable (self.file)
self.searchable = False
self.lcid = None
result, ui = chmlib.chm_resolve_object(self.file, '/#SYSTEM')
if (result != chmlib.CHM_RESOLVE_SUCCESS):
sys.stderr.write('GetArchiveInfo: #SYSTEM does not exist\n')
return 0
size, text = chmlib.chm_retrieve_object(self.file, ui, 4l, ui.length)
if (size == 0):
sys.stderr.write('GetArchiveInfo: file size = 0\n')
return 0
buff = array.array('B', text)
index = 0
while (index < size):
cursor = buff[index] + (buff[index+1] * 256)
if (cursor == 0):
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.topics = '/' + text[index:index+cursor-1]
elif (cursor == 1):
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.index = '/' + text[index:index+cursor-1]
elif (cursor == 2):
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.home = '/' + text[index:index+cursor-1]
elif (cursor == 3):
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.title = text[index:index+cursor-1]
elif (cursor == 4):
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.lcid = buff[index] + (buff[index+1] * 256)
elif (cursor == 6):
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
tmp = text[index:index+cursor-1]
if not self.topics:
tmp1 = '/' + tmp + '.hhc'
tmp2 = '/' + tmp + '.hhk'
res1, ui1 = chmlib.chm_resolve_object(self.file, tmp1)
res2, ui2 = chmlib.chm_resolve_object(self.file, tmp2)
if (not self.topics) and \
(res1 == chmlib.CHM_RESOLVE_SUCCESS):
self.topics = '/' + tmp + '.hhc'
if (not self.index) and \
(res2 == chmlib.CHM_RESOLVE_SUCCESS):
self.index = '/' + tmp + '.hhk'
elif (cursor == 16):
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.encoding = text[index:index+cursor-1]
else:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
index += cursor
self.GetWindowsInfo()
if not self.lcid:
self.lcid = extra.get_lcid (self.file)
return 1
def GetTopicsTree(self):
'''Reads and returns the topics tree.
This auxiliary function reads and returns the topics tree file
contents for the CHM archive.
'''
if (self.topics == None):
return None
if self.topics:
res, ui = chmlib.chm_resolve_object(self.file, self.topics)
if (res != chmlib.CHM_RESOLVE_SUCCESS):
return None
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
if (size == 0):
sys.stderr.write('GetTopicsTree: file size = 0\n')
return None
return text
def GetIndex(self):
'''Reads and returns the index tree.
This auxiliary function reads and returns the index tree file
contents for the CHM archive.
'''
if (self.index == None):
return None
if self.index:
res, ui = chmlib.chm_resolve_object(self.file, self.index)
if (res != chmlib.CHM_RESOLVE_SUCCESS):
return None
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
if (size == 0):
sys.stderr.write('GetIndex: file size = 0\n')
return None
return text
def ResolveObject(self, document):
'''Tries to locate a document in the archive.
This function tries to locate the document inside the archive. It
returns a tuple where the first element is zero if the function
was successful, and the second is the UnitInfo for that document.
The UnitInfo is used to retrieve the document contents
'''
if self.file:
#path = os.path.abspath(document)
path = document
return chmlib.chm_resolve_object(self.file, path)
else:
return (1, None)
def RetrieveObject(self, ui, start = -1, length = -1):
'''Retrieves the contents of a document.
This function takes a UnitInfo and two optional arguments, the first
being the start address and the second is the length. These define
the amount of data to be read from the archive.
'''
if self.file and ui:
if length == -1:
len = ui.length
else:
len = length
if start == -1:
st = 0l
else:
st = long(start)
return chmlib.chm_retrieve_object(self.file, ui, st, len)
else:
return (0, '')
def Search(self, text, wholewords=0, titleonly=0):
'''Performs full-text search on the archive.
The first parameter is the word to look for, the second
indicates if the search should be for whole words only, and
the third parameter indicates if the search should be
restricted to page titles.
This method will return a tuple, the first item
indicating if the search results were partial, and the second
item being a dictionary containing the results.'''
if text and text != '' and self.file:
return extra.search (self.file, text, wholewords,
titleonly)
else:
return None
def IsSearchable(self):
'''Indicates if the full-text search is available for this
archive - this flag is updated when GetArchiveInfo is called'''
return self.searchable
def GetEncoding(self):
'''Returns a string that can be used with the codecs python package
to encode or decode the files in the chm archive. If an error is
found, or if it is not possible to find the encoding, None is
returned.'''
if self.encoding:
vals = string.split(self.encoding, ',')
if len(vals) > 2:
try:
return charset_table[int(vals[2])]
except KeyError:
pass
return None
def GetLCID(self):
'''Returns the archive Locale ID'''
if self.lcid in locale_table:
return locale_table[self.lcid]
else:
return None
def GetDWORD(self, buff, idx=0):
'''Internal method.
Reads a double word (4 bytes) from a buffer.
'''
result = buff[idx] + (buff[idx+1]<<8) + (buff[idx+2]<<16) + \
(buff[idx+3]<<24)
if result == 0xFFFFFFFF:
result = 0
return result
def GetString(self, text, idx):
'''Internal method.
Retrieves a string from the #STRINGS buffer.
'''
next = string.find(text, '\x00', idx)
chunk = text[idx:next]
return chunk
def GetWindowsInfo(self):
'''Gets information from the #WINDOWS file.
Checks the #WINDOWS file to see if it has any info that was
not found in #SYSTEM (topics, index or default page.
'''
result, ui = chmlib.chm_resolve_object(self.file, '/#WINDOWS')
if (result != chmlib.CHM_RESOLVE_SUCCESS):
return -1
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, 8)
if (size < 8):
return -2
buff = array.array('B', text)
num_entries = self.GetDWORD(buff, 0)
entry_size = self.GetDWORD(buff, 4)
if num_entries < 1:
return -3
size, text = chmlib.chm_retrieve_object(self.file, ui, 8l, entry_size)
if (size < entry_size):
return -4
buff = array.array('B', text)
toc_index = self.GetDWORD(buff, 0x60)
idx_index = self.GetDWORD(buff, 0x64)
dft_index = self.GetDWORD(buff, 0x68)
result, ui = chmlib.chm_resolve_object(self.file, '/#STRINGS')
if (result != chmlib.CHM_RESOLVE_SUCCESS):
return -5
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
if (size == 0):
return -6
if (not self.topics):
self.topics = self.GetString(text, toc_index)
if not self.topics.startswith("/"):
self.topics = "/" + self.topics
if (not self.index):
self.index = self.GetString(text, idx_index)
if not self.index.startswith("/"):
self.index = "/" + self.index
if (dft_index != 0):
self.home = self.GetString(text, dft_index)
if not self.home.startswith("/"):
self.home = "/" + self.home

View File

@ -0,0 +1,100 @@
# This file was created automatically by SWIG.
# Don't modify this file, modify the SWIG interface instead.
# This file is compatible with both classic and new-style classes.
from calibre.constants import plugins
_chmlib, chmlib_err = plugins['chmlib']
if chmlib_err:
raise RuntimeError('Failed to load chmlib: '+chmlib_err)
def _swig_setattr(self,class_type,name,value):
if (name == "this"):
if isinstance(value, class_type):
self.__dict__[name] = value.this
if hasattr(value,"thisown"): self.__dict__["thisown"] = value.thisown
del value.thisown
return
method = class_type.__swig_setmethods__.get(name,None)
if method: return method(self,value)
self.__dict__[name] = value
def _swig_getattr(self,class_type,name):
method = class_type.__swig_getmethods__.get(name,None)
if method: return method(self)
raise AttributeError,name
import types
try:
_object = types.ObjectType
_newclass = 1
except AttributeError:
class _object : pass
_newclass = 0
CHM_UNCOMPRESSED = _chmlib.CHM_UNCOMPRESSED
CHM_COMPRESSED = _chmlib.CHM_COMPRESSED
CHM_MAX_PATHLEN = _chmlib.CHM_MAX_PATHLEN
class chmUnitInfo(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, chmUnitInfo, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, chmUnitInfo, name)
__swig_setmethods__["start"] = _chmlib.chmUnitInfo_start_set
__swig_getmethods__["start"] = _chmlib.chmUnitInfo_start_get
if _newclass:start = property(_chmlib.chmUnitInfo_start_get,_chmlib.chmUnitInfo_start_set)
__swig_setmethods__["length"] = _chmlib.chmUnitInfo_length_set
__swig_getmethods__["length"] = _chmlib.chmUnitInfo_length_get
if _newclass:length = property(_chmlib.chmUnitInfo_length_get,_chmlib.chmUnitInfo_length_set)
__swig_setmethods__["space"] = _chmlib.chmUnitInfo_space_set
__swig_getmethods__["space"] = _chmlib.chmUnitInfo_space_get
if _newclass:space = property(_chmlib.chmUnitInfo_space_get,_chmlib.chmUnitInfo_space_set)
__swig_setmethods__["path"] = _chmlib.chmUnitInfo_path_set
__swig_getmethods__["path"] = _chmlib.chmUnitInfo_path_get
if _newclass:path = property(_chmlib.chmUnitInfo_path_get,_chmlib.chmUnitInfo_path_set)
def __init__(self,*args):
_swig_setattr(self, chmUnitInfo, 'this', apply(_chmlib.new_chmUnitInfo,args))
_swig_setattr(self, chmUnitInfo, 'thisown', 1)
def __del__(self, destroy= _chmlib.delete_chmUnitInfo):
try:
if self.thisown: destroy(self)
except: pass
def __repr__(self):
return "<C chmUnitInfo instance at %s>" % (self.this,)
class chmUnitInfoPtr(chmUnitInfo):
def __init__(self,this):
_swig_setattr(self, chmUnitInfo, 'this', this)
if not hasattr(self,"thisown"): _swig_setattr(self, chmUnitInfo, 'thisown', 0)
_swig_setattr(self, chmUnitInfo,self.__class__,chmUnitInfo)
_chmlib.chmUnitInfo_swigregister(chmUnitInfoPtr)
chm_open = _chmlib.chm_open
chm_close = _chmlib.chm_close
CHM_PARAM_MAX_BLOCKS_CACHED = _chmlib.CHM_PARAM_MAX_BLOCKS_CACHED
chm_set_param = _chmlib.chm_set_param
CHM_RESOLVE_SUCCESS = _chmlib.CHM_RESOLVE_SUCCESS
CHM_RESOLVE_FAILURE = _chmlib.CHM_RESOLVE_FAILURE
chm_resolve_object = _chmlib.chm_resolve_object
chm_retrieve_object = _chmlib.chm_retrieve_object
CHM_ENUMERATE_NORMAL = _chmlib.CHM_ENUMERATE_NORMAL
CHM_ENUMERATE_META = _chmlib.CHM_ENUMERATE_META
CHM_ENUMERATE_SPECIAL = _chmlib.CHM_ENUMERATE_SPECIAL
CHM_ENUMERATE_FILES = _chmlib.CHM_ENUMERATE_FILES
CHM_ENUMERATE_DIRS = _chmlib.CHM_ENUMERATE_DIRS
CHM_ENUMERATE_ALL = _chmlib.CHM_ENUMERATE_ALL
CHM_ENUMERATOR_FAILURE = _chmlib.CHM_ENUMERATOR_FAILURE
CHM_ENUMERATOR_CONTINUE = _chmlib.CHM_ENUMERATOR_CONTINUE
CHM_ENUMERATOR_SUCCESS = _chmlib.CHM_ENUMERATOR_SUCCESS
chm_enumerate = _chmlib.chm_enumerate
chm_enumerate_dir = _chmlib.chm_enumerate_dir

View File

@ -0,0 +1,759 @@
/*
* extra.c - full-text search support for pychm
*
* Copyright (C) 2004 Rubens Ramos <rubensr@users.sourceforge.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307, USA.
*
* Author: Rubens Ramos <rubensr@users.sourceforge.net>
*
* Heavily based on work done by:
* Pabs <pabs@zip.to> - chmdeco
* Razvan Cojocaru <razvanco@gmx.net> - xCHM
*
*/
#include "chm_lib.h"
#ifdef __PYTHON__
#include "Python.h"
#else
#include <stdio.h>
#define PyObject void
#endif
#include <stdlib.h>
#ifdef _MSC_VER
#include "stdint.h"
#define strcasecmp stricmp
#define strncasecmp strnicmp
#else
#include <inttypes.h>
#include <strings.h>
#endif
#if defined( _MSC_VER ) && !defined( __cplusplus )
# define inline __inline
#endif
#if defined(_WIN32) || defined(__WIN32__)
# if defined(_MSC_VER)
# if defined(STATIC_LINKED)
# define MODEXPORT(a) a
# define MODIMPORT(a) extern a
# else
# define MODEXPORT(a) __declspec(dllexport) a
# define MODIMPORT(a) extern a
# endif
# else
# if defined(__BORLANDC__)
# define MODEXPORT(a) a _export
# define MODIMPORT(a) a _export
# else
# define MODEXPORT(a) a
# define MODIMPORT(a) a
# endif
# endif
#else
# define MODEXPORT(a) a
# define MODIMPORT(a) a
#endif
#define false 0
#define true 1
#define FTS_HEADER_LEN 0x32
#define TOPICS_ENTRY_LEN 16
#define COMMON_BUF_LEN 1025
#define FREE(x) free (x); x = NULL
inline uint16_t
get_uint16 (uint8_t* b) {
return b[0] |
b[1]<<8;
}
inline uint32_t
get_uint32 (uint8_t* b) {
return b[0] |
b[1]<<8 |
b[2]<<16 |
b[3]<<24;
}
inline uint64_t
get_uint64 (uint8_t* b) {
return b[0] |
b[1]<<8 |
b[2]<<16 |
b[3]<<24 |
(uint64_t) b[4]<<32 |
(uint64_t) b[5]<<40 |
(uint64_t) b[6]<<48 |
(uint64_t) b[7]<<56;
}
inline uint64_t
be_encint (unsigned char *buffer, size_t *length)
{
uint64_t result = 0;
int shift=0;
*length = 0;
do {
result |= ((*buffer) & 0x7f) << shift;
shift += 7;
*length = *length + 1;
} while (*(buffer++) & 0x80);
return result;
}
/*
Finds the first unset bit in memory. Returns the number of set bits found.
Returns -1 if the buffer runs out before we find an unset bit.
*/
inline int
ffus (unsigned char* byte, int* bit, size_t *length) {
int bits = 0;
*length = 0;
while(*byte & (1 << *bit)){
if(*bit)
--(*bit);
else {
++byte;
++(*length);
*bit = 7;
}
++bits;
}
if(*bit)
--(*bit);
else {
++(*length);
*bit = 7;
}
return bits;
}
inline uint64_t
sr_int(unsigned char* byte, int* bit,
unsigned char s, unsigned char r, size_t *length)
{
uint64_t ret;
unsigned char mask;
int n, n_bits, num_bits, base, count;
size_t fflen;
*length = 0;
if(!bit || *bit > 7 || s != 2)
return ~(uint64_t)0;
ret = 0;
count = ffus(byte, bit, &fflen);
*length += fflen;
byte += *length;
n_bits = n = r + (count ? count-1 : 0) ;
while (n > 0) {
num_bits = n > *bit ? *bit : n-1;
base = n > *bit ? 0 : *bit - (n-1);
switch (num_bits){
case 0:
mask = 1;
break;
case 1:
mask = 3;
break;
case 2:
mask = 7;
break;
case 3:
mask = 0xf;
break;
case 4:
mask = 0x1f;
break;
case 5:
mask = 0x3f;
break;
case 6:
mask = 0x7f;
break;
case 7:
mask = 0xff;
break;
default:
mask = 0xff;
break;
}
mask <<= base;
ret = (ret << (num_bits+1)) |
(uint64_t)((*byte & mask) >> base);
if( n > *bit ){
++byte;
++(*length);
n -= *bit+1;
*bit = 7;
} else {
*bit -= n;
n = 0;
}
}
if(count)
ret |= (uint64_t)1 << n_bits;
return ret;
}
inline uint32_t
get_leaf_node_offset(struct chmFile *chmfile,
const char *text,
uint32_t initial_offset,
uint32_t buff_size,
uint16_t tree_depth,
struct chmUnitInfo *ui)
{
unsigned char word_len;
unsigned char pos;
uint16_t free_space;
char *wrd_buf;
char *word = NULL;
uint32_t test_offset = 0;
uint32_t i = sizeof(uint16_t);
unsigned char *buffer = (unsigned char *)malloc (buff_size);
if (NULL == buffer)
return 0;
while (--tree_depth) {
if (initial_offset == test_offset) {
FREE(buffer);
return 0;
}
test_offset = initial_offset;
if (chm_retrieve_object (chmfile, ui, buffer,
initial_offset, buff_size) == 0) {
FREE(buffer);
return 0;
}
free_space = get_uint16 (buffer);
while (i < buff_size - free_space) {
word_len = *(buffer + i);
pos = *(buffer + i + 1);
wrd_buf = (char*)malloc (word_len);
memcpy (wrd_buf, buffer + i + 2, word_len - 1);
wrd_buf[word_len - 1] = 0;
if (pos == 0) {
FREE (word);
word = (char *) strdup (wrd_buf);
} else {
word = (char*)realloc (word, word_len + pos + 1);
strcpy (word + pos, wrd_buf);
}
FREE(wrd_buf);
if (strcasecmp (text, word) <= 0) {
initial_offset = get_uint32 (buffer + i + word_len + 1);
break;
}
i += word_len + sizeof (unsigned char) + sizeof(uint32_t) +
sizeof(uint16_t);
}
}
if(initial_offset == test_offset)
initial_offset = 0;
FREE(word);
FREE(buffer);
return initial_offset;
}
inline int
pychm_process_wlc (struct chmFile *chmfile,
uint64_t wlc_count, uint64_t wlc_size,
uint32_t wlc_offset, unsigned char ds,
unsigned char dr, unsigned char cs,
unsigned char cr, unsigned char ls,
unsigned char lr, struct chmUnitInfo *uimain,
struct chmUnitInfo* uitbl,
struct chmUnitInfo *uistrings,
struct chmUnitInfo* topics,
struct chmUnitInfo *urlstr,
PyObject *dict)
{
uint32_t stroff, urloff;
uint64_t i, j, count;
size_t length;
int wlc_bit = 7;
size_t off = 0;
uint64_t index = 0;
unsigned char entry[TOPICS_ENTRY_LEN];
unsigned char combuf[COMMON_BUF_LEN];
unsigned char *buffer = (unsigned char *)malloc (wlc_size);
char *url = NULL;
char *topic = NULL;
if (chm_retrieve_object(chmfile, uimain, buffer,
wlc_offset, wlc_size) == 0) {
FREE(buffer);
return false;
}
for (i = 0; i < wlc_count; ++i) {
if(wlc_bit != 7) {
++off;
wlc_bit = 7;
}
index += sr_int(buffer + off, &wlc_bit, ds, dr, &length);
off += length;
if(chm_retrieve_object(chmfile, topics, entry,
index * 16, TOPICS_ENTRY_LEN) == 0) {
FREE(topic);
FREE(url);
FREE(buffer);
return false;
}
combuf[COMMON_BUF_LEN - 1] = 0;
stroff = get_uint32 (entry + 4);
FREE (topic);
if (chm_retrieve_object (chmfile, uistrings, combuf,
stroff, COMMON_BUF_LEN - 1) == 0) {
topic = strdup ("Untitled in index");
} else {
combuf[COMMON_BUF_LEN - 1] = 0;
topic = strdup ((char*)combuf);
}
urloff = get_uint32 (entry + 8);
if(chm_retrieve_object (chmfile, uitbl, combuf,
urloff, 12) == 0) {
FREE(buffer);
return false;
}
urloff = get_uint32 (combuf + 8);
if (chm_retrieve_object (chmfile, urlstr, combuf,
urloff + 8, COMMON_BUF_LEN - 1) == 0) {
FREE(topic);
FREE(url);
FREE(buffer);
return false;
}
combuf[COMMON_BUF_LEN - 1] = 0;
FREE (url);
url = strdup ((char*)combuf);
if (url && topic) {
#ifdef __PYTHON__
PyDict_SetItemString (dict, topic,
PyString_FromString (url));
#else
printf ("%s ==> %s\n", url, topic);
#endif
}
count = sr_int (buffer + off, &wlc_bit, cs, cr, &length);
off += length;
for (j = 0; j < count; ++j) {
sr_int (buffer + off, &wlc_bit, ls, lr, &length);
off += length;
}
}
FREE(topic);
FREE(url);
FREE(buffer);
return true;
}
int
chm_search (struct chmFile *chmfile,
const char *text, int whole_words,
int titles_only, PyObject *dict)
{
unsigned char header[FTS_HEADER_LEN];
unsigned char doc_index_s;
unsigned char doc_index_r;
unsigned char code_count_s;
unsigned char code_count_r;
unsigned char loc_codes_s;
unsigned char loc_codes_r;
unsigned char word_len, pos;
unsigned char *buffer;
char *word = NULL;
uint32_t node_offset;
uint32_t node_len;
uint16_t tree_depth;
uint32_t i;
uint16_t free_space;
uint64_t wlc_count, wlc_size;
uint32_t wlc_offset;
char *wrd_buf;
unsigned char title;
size_t encsz;
struct chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;
int partial = false;
if (NULL == text)
return -1;
if (chm_resolve_object (chmfile, "/$FIftiMain", &ui) !=
CHM_RESOLVE_SUCCESS ||
chm_resolve_object (chmfile, "/#TOPICS", &uitopics) !=
CHM_RESOLVE_SUCCESS ||
chm_resolve_object (chmfile, "/#STRINGS", &uistrings) !=
CHM_RESOLVE_SUCCESS ||
chm_resolve_object (chmfile, "/#URLTBL", &uiurltbl) !=
CHM_RESOLVE_SUCCESS ||
chm_resolve_object (chmfile, "/#URLSTR", &uiurlstr) !=
CHM_RESOLVE_SUCCESS)
return false;
if(chm_retrieve_object(chmfile, &ui, header, 0, FTS_HEADER_LEN) == 0)
return false;
doc_index_s = header[0x1E];
doc_index_r = header[0x1F];
code_count_s = header[0x20];
code_count_r = header[0x21];
loc_codes_s = header[0x22];
loc_codes_r = header[0x23];
if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {
return false;
}
node_offset = get_uint32 (header + 0x14);
node_len = get_uint32 (header + 0x2e);
tree_depth = get_uint16 (header + 0x18);
i = sizeof(uint16_t);
buffer = (unsigned char*)malloc (node_len);
node_offset = get_leaf_node_offset (chmfile, text, node_offset, node_len,
tree_depth, &ui);
if (!node_offset) {
FREE(buffer);
return false;
}
do {
if (chm_retrieve_object (chmfile, &ui, buffer,
node_offset, node_len) == 0) {
FREE(word);
FREE(buffer);
return false;
}
free_space = get_uint16 (buffer + 6);
i = sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t);
encsz = 0;
while (i < node_len - free_space) {
word_len = *(buffer + i);
pos = *(buffer + i + 1);
wrd_buf = (char*)malloc (word_len);
memcpy (wrd_buf, buffer + i + 2, word_len - 1);
wrd_buf[word_len - 1] = 0;
if (pos == 0) {
FREE(word);
word = (char *) strdup (wrd_buf);
} else {
word = (char*)realloc (word, word_len + pos + 1);
strcpy (word + pos, wrd_buf);
}
FREE(wrd_buf);
i += 2 + word_len;
title = *(buffer + i - 1);
wlc_count = be_encint (buffer + i, &encsz);
i += encsz;
wlc_offset = get_uint32 (buffer + i);
i += sizeof(uint32_t) + sizeof(uint16_t);
wlc_size = be_encint (buffer + i, &encsz);
i += encsz;
node_offset = get_uint32 (buffer);
if (!title && titles_only)
continue;
if (whole_words && !strcasecmp(text, word)) {
partial = pychm_process_wlc (chmfile, wlc_count, wlc_size,
wlc_offset, doc_index_s,
doc_index_r,code_count_s,
code_count_r, loc_codes_s,
loc_codes_r, &ui, &uiurltbl,
&uistrings, &uitopics,
&uiurlstr, dict);
FREE(word);
FREE(buffer);
return partial;
}
if (!whole_words) {
if (!strncasecmp (word, text, strlen(text))) {
partial = true;
pychm_process_wlc (chmfile, wlc_count, wlc_size,
wlc_offset, doc_index_s,
doc_index_r,code_count_s,
code_count_r, loc_codes_s,
loc_codes_r, &ui, &uiurltbl,
&uistrings, &uitopics,
&uiurlstr, dict);
} else if (strncasecmp (text, word, strlen(text)) < -1)
break;
}
}
} while (!whole_words &&
!strncmp (word, text, strlen(text)) &&
node_offset);
FREE(word);
FREE(buffer);
return partial;
}
typedef struct {
const char *file;
int offset;
} Langrec;
Langrec lang_files[] = {
{"/$FIftiMain", 0x7E},
{"$WWKeywordLinks/BTree", 0x34},
{"$WWAssociativeLinks/BTree", 0x34}
};
#define LANG_FILES_SIZE (sizeof(lang_files)/sizeof(Langrec))
int
chm_get_lcid (struct chmFile *chmfile) {
struct chmUnitInfo ui;
uint32_t lang;
int i;
for (i=0; i<LANG_FILES_SIZE; i++) {
if (chm_resolve_object (chmfile, lang_files[i].file, &ui) ==
CHM_RESOLVE_SUCCESS) {
if (chm_retrieve_object (chmfile, &ui, (unsigned char *) &lang,
lang_files[i].offset, sizeof(uint32_t)) != 0)
return lang;
}
}
return -1;
}
#ifdef __PYTHON__
static PyObject *
is_searchable (PyObject *self, PyObject *args) {
struct chmFile *file;
PyObject *obj0;
struct chmUnitInfo ui;
if (PyArg_ParseTuple (args, "O:is_searchable", &obj0)) {
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
if (chm_resolve_object (file, "/$FIftiMain", &ui) !=
CHM_RESOLVE_SUCCESS ||
chm_resolve_object (file, "/#TOPICS", &ui) !=
CHM_RESOLVE_SUCCESS ||
chm_resolve_object (file, "/#STRINGS", &ui) !=
CHM_RESOLVE_SUCCESS ||
chm_resolve_object (file, "/#URLTBL", &ui) !=
CHM_RESOLVE_SUCCESS ||
chm_resolve_object (file, "/#URLSTR", &ui) !=
CHM_RESOLVE_SUCCESS)
return Py_BuildValue ("i", 0);
else
return Py_BuildValue ("i", 1);
} else {
PyErr_SetString(PyExc_TypeError, "Expected chmfile (not CHMFile!)");
return NULL;
}
}
static PyObject *
search (PyObject *self, PyObject *args) {
char *text;
int whole_words;
int titles_only;
int partial;
struct chmFile *file;
PyObject *obj0;
PyObject *dict;
if (PyArg_ParseTuple (args, "Osii:search", &obj0, &text,
&whole_words, &titles_only)) {
dict = PyDict_New();
if (dict) {
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
partial = chm_search (file,
text, whole_words, titles_only, dict);
return Py_BuildValue ("(iO)", partial, dict);
} else {
PyErr_NoMemory();
return NULL;
}
} else {
PyErr_SetString(PyExc_TypeError,
"Expected chmfile (not CHMFile!), string, int, int");
return NULL;
}
}
static PyObject *
get_lcid (PyObject *self, PyObject *args) {
int code;
struct chmFile *file;
PyObject *obj0;
if (PyArg_ParseTuple (args, "O:get_lcid", &obj0)) {
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
code = chm_get_lcid (file);
if (code != -1)
return Py_BuildValue ("i", code);
else
Py_INCREF(Py_None);
return Py_None;
} else {
PyErr_SetString(PyExc_TypeError,"Expected a chmfile (not a CHMFile!)");
return NULL;
}
}
static PyMethodDef
IndexMethods[] = {
{"get_lcid", get_lcid, METH_VARARGS,
"Returns LCID (Locale ID) for archive."},
{"search", search, METH_VARARGS,
"Perform Full-Text search."},
{"is_searchable", is_searchable, METH_VARARGS,
"Return 1 if it is possible to search the archive, 0 otherwise."},
{NULL, NULL, 0, NULL}
};
#ifdef __cplusplus
extern "C"
#endif
MODEXPORT(void)
initchm_extra (void) {
Py_InitModule ("chm_extra", IndexMethods);
}
#else
int
main (int argc, char **argv) {
struct chmFile *file;
char text[255];
int whole_words, titles_only;
int partial;
if (argc == 2) {
file = chm_open (argv[1]);
if (file) {
printf ("\nLCID= %d (%08X)\n", chm_get_lcid(file), chm_get_lcid(file));
while (1) {
printf ("\n<whole_words> <titles_only> <string>\n");
printf ("> ");
if (scanf ("%d %d %s", &whole_words, &titles_only, text))
partial = chm_search (file,
text, whole_words, titles_only, NULL);
else
break;
printf ("Partial = %d\n", partial);
}
chm_close (file);
return 0;
}
return -1;
} else {
printf ("\n%s <filename>\n", argv[0]);
return 0;
}
}
#endif

View File

@ -0,0 +1,247 @@
// ISO C9x compliant stdint.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006-2008 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <limits.h>
// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
// or compiler give many errors like this:
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
#ifdef __cplusplus
extern "C" {
#endif
# include <wchar.h>
#ifdef __cplusplus
}
#endif
// Define _W64 macros to mark types changing their size, like intptr_t.
#ifndef _W64
# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
# define _W64 __w64
# else
# define _W64
# endif
#endif
// 7.18.1 Integer types
// 7.18.1.1 Exact-width integer types
// Visual Studio 6 and Embedded Visual C++ 4 doesn't
// realize that, e.g. char has the same size as __int8
// so we give up on __intX for them.
#if (_MSC_VER < 1300)
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
#else
typedef signed __int8 int8_t;
typedef signed __int16 int16_t;
typedef signed __int32 int32_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
#endif
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
// 7.18.1.2 Minimum-width integer types
typedef int8_t int_least8_t;
typedef int16_t int_least16_t;
typedef int32_t int_least32_t;
typedef int64_t int_least64_t;
typedef uint8_t uint_least8_t;
typedef uint16_t uint_least16_t;
typedef uint32_t uint_least32_t;
typedef uint64_t uint_least64_t;
// 7.18.1.3 Fastest minimum-width integer types
typedef int8_t int_fast8_t;
typedef int16_t int_fast16_t;
typedef int32_t int_fast32_t;
typedef int64_t int_fast64_t;
typedef uint8_t uint_fast8_t;
typedef uint16_t uint_fast16_t;
typedef uint32_t uint_fast32_t;
typedef uint64_t uint_fast64_t;
// 7.18.1.4 Integer types capable of holding object pointers
#ifdef _WIN64 // [
typedef signed __int64 intptr_t;
typedef unsigned __int64 uintptr_t;
#else // _WIN64 ][
typedef _W64 signed int intptr_t;
typedef _W64 unsigned int uintptr_t;
#endif // _WIN64 ]
// 7.18.1.5 Greatest-width integer types
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
// 7.18.2 Limits of specified-width integer types
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
// 7.18.2.1 Limits of exact-width integer types
#define INT8_MIN ((int8_t)_I8_MIN)
#define INT8_MAX _I8_MAX
#define INT16_MIN ((int16_t)_I16_MIN)
#define INT16_MAX _I16_MAX
#define INT32_MIN ((int32_t)_I32_MIN)
#define INT32_MAX _I32_MAX
#define INT64_MIN ((int64_t)_I64_MIN)
#define INT64_MAX _I64_MAX
#define UINT8_MAX _UI8_MAX
#define UINT16_MAX _UI16_MAX
#define UINT32_MAX _UI32_MAX
#define UINT64_MAX _UI64_MAX
// 7.18.2.2 Limits of minimum-width integer types
#define INT_LEAST8_MIN INT8_MIN
#define INT_LEAST8_MAX INT8_MAX
#define INT_LEAST16_MIN INT16_MIN
#define INT_LEAST16_MAX INT16_MAX
#define INT_LEAST32_MIN INT32_MIN
#define INT_LEAST32_MAX INT32_MAX
#define INT_LEAST64_MIN INT64_MIN
#define INT_LEAST64_MAX INT64_MAX
#define UINT_LEAST8_MAX UINT8_MAX
#define UINT_LEAST16_MAX UINT16_MAX
#define UINT_LEAST32_MAX UINT32_MAX
#define UINT_LEAST64_MAX UINT64_MAX
// 7.18.2.3 Limits of fastest minimum-width integer types
#define INT_FAST8_MIN INT8_MIN
#define INT_FAST8_MAX INT8_MAX
#define INT_FAST16_MIN INT16_MIN
#define INT_FAST16_MAX INT16_MAX
#define INT_FAST32_MIN INT32_MIN
#define INT_FAST32_MAX INT32_MAX
#define INT_FAST64_MIN INT64_MIN
#define INT_FAST64_MAX INT64_MAX
#define UINT_FAST8_MAX UINT8_MAX
#define UINT_FAST16_MAX UINT16_MAX
#define UINT_FAST32_MAX UINT32_MAX
#define UINT_FAST64_MAX UINT64_MAX
// 7.18.2.4 Limits of integer types capable of holding object pointers
#ifdef _WIN64 // [
# define INTPTR_MIN INT64_MIN
# define INTPTR_MAX INT64_MAX
# define UINTPTR_MAX UINT64_MAX
#else // _WIN64 ][
# define INTPTR_MIN INT32_MIN
# define INTPTR_MAX INT32_MAX
# define UINTPTR_MAX UINT32_MAX
#endif // _WIN64 ]
// 7.18.2.5 Limits of greatest-width integer types
#define INTMAX_MIN INT64_MIN
#define INTMAX_MAX INT64_MAX
#define UINTMAX_MAX UINT64_MAX
// 7.18.3 Limits of other integer types
#ifdef _WIN64 // [
# define PTRDIFF_MIN _I64_MIN
# define PTRDIFF_MAX _I64_MAX
#else // _WIN64 ][
# define PTRDIFF_MIN _I32_MIN
# define PTRDIFF_MAX _I32_MAX
#endif // _WIN64 ]
#define SIG_ATOMIC_MIN INT_MIN
#define SIG_ATOMIC_MAX INT_MAX
#ifndef SIZE_MAX // [
# ifdef _WIN64 // [
# define SIZE_MAX _UI64_MAX
# else // _WIN64 ][
# define SIZE_MAX _UI32_MAX
# endif // _WIN64 ]
#endif // SIZE_MAX ]
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
#ifndef WCHAR_MIN // [
# define WCHAR_MIN 0
#endif // WCHAR_MIN ]
#ifndef WCHAR_MAX // [
# define WCHAR_MAX _UI16_MAX
#endif // WCHAR_MAX ]
#define WINT_MIN 0
#define WINT_MAX _UI16_MAX
#endif // __STDC_LIMIT_MACROS ]
// 7.18.4 Limits of other integer types
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
// 7.18.4.1 Macros for minimum-width integer constants
#define INT8_C(val) val##i8
#define INT16_C(val) val##i16
#define INT32_C(val) val##i32
#define INT64_C(val) val##i64
#define UINT8_C(val) val##ui8
#define UINT16_C(val) val##ui16
#define UINT32_C(val) val##ui32
#define UINT64_C(val) val##ui64
// 7.18.4.2 Macros for greatest-width integer constants
#define INTMAX_C INT64_C
#define UINTMAX_C UINT64_C
#endif // __STDC_CONSTANT_MACROS ]
#endif // _MSC_STDINT_H_ ]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,214 @@
%module chmlib
%include "typemaps.i"
%include "cstring.i"
%{
/*
Copyright (C) 2003 Rubens Ramos <rubensr@users.sourceforge.net>
Based on code by:
Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net>
pychm is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public
License along with this program; see the file COPYING. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA
$Id: swig_chm.i,v 1.1.1.1 2003/12/02 12:38:14 rubensr Exp $
*/
#include "chm_lib.h"
#include <stdio.h>
static PyObject *my_callback = NULL;
static PyObject *
my_set_callback(PyObject *dummy, PyObject *arg)
{
PyObject *result = NULL;
if (!PyCallable_Check(arg)) {
PyErr_SetString(PyExc_TypeError, "parameter must be callable");
return NULL;
}
Py_XINCREF(arg); /* Add a reference to new callback */
Py_XDECREF(my_callback); /* Dispose of previous callback */
my_callback = arg; /* Remember new callback */
/* Boilerplate to return "None" */
Py_INCREF(Py_None);
result = Py_None;
return result;
}
int dummy_enumerator (struct chmFile *h,
struct chmUnitInfo *ui,
void *context) {
PyObject *arglist;
PyObject *result;
PyObject *py_h;
PyObject *py_ui;
PyObject *py_c;
py_h = SWIG_NewPointerObj((void *) h, SWIGTYPE_p_chmFile, 0);
py_ui = SWIG_NewPointerObj((void *) ui, SWIGTYPE_p_chmUnitInfo, 0);
py_c = PyCObject_AsVoidPtr(context);
/* Time to call the callback */
arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
if (arglist) {
result = PyEval_CallObject(my_callback, arglist);
Py_DECREF(arglist);
Py_DECREF(result);
Py_DECREF(py_h);
Py_DECREF(py_ui);
if (result == NULL) {
return 0; /* Pass error back */
} else {
return 1;
}
} else
return 0;
}
%}
%typemap(in) CHM_ENUMERATOR {
if (!my_set_callback(self, $input)) goto fail;
$1 = dummy_enumerator;
}
%typemap(in) void *context {
if (!($1 = PyCObject_FromVoidPtr($input, NULL))) goto fail;
}
%typemap(in, numinputs=0) struct chmUnitInfo *OutValue (struct chmUnitInfo *temp = (struct chmUnitInfo *) calloc(1, sizeof(struct chmUnitInfo))) {
$1 = temp;
}
%typemap(argout) struct chmUnitInfo *OutValue {
PyObject *o, *o2, *o3;
o = SWIG_NewPointerObj((void *) $1, SWIGTYPE_p_chmUnitInfo, 1);
if ((!$result) || ($result == Py_None)) {
$result = o;
} else {
if (!PyTuple_Check($result)) {
PyObject *o2 = $result;
$result = PyTuple_New(1);
PyTuple_SetItem($result,0,o2);
}
o3 = PyTuple_New(1);
PyTuple_SetItem(o3,0,o);
o2 = $result;
$result = PySequence_Concat(o2,o3);
Py_DECREF(o2);
Py_DECREF(o3);
}
}
%typemap(check) unsigned char *OUTPUT {
/* nasty hack */
#ifdef __cplusplus
$1 = ($1_ltype) new char[arg5];
#else
$1 = ($1_ltype) malloc(arg5);
#endif
if ($1 == NULL) SWIG_fail;
}
%typemap(argout,fragment="t_output_helper") unsigned char *OUTPUT {
PyObject *o;
o = PyString_FromStringAndSize($1, arg5);
$result = t_output_helper($result,o);
#ifdef __cplusplus
delete [] $1;
#else
free($1);
#endif
}
#ifdef WIN32
typedef unsigned __int64 LONGUINT64;
typedef __int64 LONGINT64;
#else
typedef unsigned long long LONGUINT64;
typedef long long LONGINT64;
#endif
/* the two available spaces in a CHM file */
/* N.B.: The format supports arbitrarily many spaces, but only */
/* two appear to be used at present. */
#define CHM_UNCOMPRESSED (0)
#define CHM_COMPRESSED (1)
/* structure representing an ITS (CHM) file stream */
struct chmFile;
/* structure representing an element from an ITS file stream */
#define CHM_MAX_PATHLEN 256
struct chmUnitInfo
{
LONGUINT64 start;
LONGUINT64 length;
int space;
char path[CHM_MAX_PATHLEN+1];
};
/* open an ITS archive */
struct chmFile* chm_open(const char *filename);
/* close an ITS archive */
void chm_close(struct chmFile *h);
/* methods for ssetting tuning parameters for particular file */
#define CHM_PARAM_MAX_BLOCKS_CACHED 0
void chm_set_param(struct chmFile *h,
int paramType,
int paramVal);
/* resolve a particular object from the archive */
#define CHM_RESOLVE_SUCCESS (0)
#define CHM_RESOLVE_FAILURE (1)
int chm_resolve_object(struct chmFile *h,
const char *objPath,
struct chmUnitInfo *OutValue);
/* retrieve part of an object from the archive */
LONGINT64 chm_retrieve_object(struct chmFile *h,
struct chmUnitInfo *ui,
unsigned char *OUTPUT,
LONGUINT64 addr,
LONGINT64 len);
/* enumerate the objects in the .chm archive */
typedef int (*CHM_ENUMERATOR)(struct chmFile *h,
struct chmUnitInfo *ui,
void *context);
#define CHM_ENUMERATE_NORMAL (1)
#define CHM_ENUMERATE_META (2)
#define CHM_ENUMERATE_SPECIAL (4)
#define CHM_ENUMERATE_FILES (8)
#define CHM_ENUMERATE_DIRS (16)
#define CHM_ENUMERATE_ALL (31)
#define CHM_ENUMERATOR_FAILURE (0)
#define CHM_ENUMERATOR_CONTINUE (1)
#define CHM_ENUMERATOR_SUCCESS (2)
int chm_enumerate(struct chmFile *h,
int what,
CHM_ENUMERATOR e,
void *context);
int chm_enumerate_dir(struct chmFile *h,
const char *prefix,
int what,
CHM_ENUMERATOR e,
void *context);

View File

@ -4,6 +4,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time, bz2
from calibre.constants import isfreebsd
from calibre.constants import __version__, __appname__, __author__
@ -57,6 +58,9 @@ def create_man_page(prog, parser):
lines = [x if isinstance(x, unicode) else unicode(x, 'utf-8', 'replace') for
x in lines]
return bz2.compress((u'\n'.join(lines)).encode('utf-8'))
if not isfreebsd:
return bz2.compress((u'\n'.join(lines)).encode('utf-8'))
else:
return (u'\n'.join(lines)).encode('utf-8')