KG updates

2025-07-09 03:04:10 -04:00 · 2010-03-02 16:26:25 -08:00 · 2010-03-02 16:26:25 -08:00 · 7ad7c34eee
commit 7ad7c34eee
parent 8f32765dac e68833a164
54 changed files with 3929 additions and 490 deletions
--- a/resources/images/news/diariovasco.png
+++ b/resources/images/news/diariovasco.png
--- a/resources/recipes/atlantic.recipe
+++ b/resources/recipes/atlantic.recipe
@ -5,76 +5,103 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 theatlantic.com
 '''
-import re
+import string
+
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag, NavigableString

 class TheAtlantic(BasicNewsRecipe):

    title      = 'The Atlantic'
    __author__ = 'Kovid Goyal and Sujata Raman'
    description = 'Current affairs and politics focussed on the US'
-    INDEX = 'http://www.theatlantic.com/doc/current'
+    INDEX = 'http://www.theatlantic.com/magazine/toc/0/'
    language = 'en'

-    remove_tags_before = dict(name='div', id='storytop')
-    remove_tags        = [
-                        dict(name='div', id=['seealso','storybottom',  'footer', 'ad_banner_top', 'sidebar','articletoolstop','subcontent',]),
-                        dict(name='p', attrs={'id':["pagination"]}),
-                        dict(name='table',attrs={'class':"tools"}),
-                        dict(name='style'),
-                        dict(name='a', href='/a/newsletters.mhtml')
-                         ]
-    remove_attributes = ['icap', 'callout', 'style']
-    no_stylesheets     = True
-    conversion_options = { 'linearize_tables':True }
+    remove_tags_before = dict(name='div', id='articleHead')
+    remove_tags_after  = dict(id='copyright')
+    remove_tags        = [dict(id=['header', 'printAds', 'pageControls'])]
+    no_stylesheets = True
+
+
+    def print_version(self, url):
+        return url.replace('/archive/', '/print/')

-    extra_css = '''
-                    #timestamp{font-family:Arial,Helvetica,sans-serif; color:#666666 ;font-size:x-small}
-                    #storytype{font-family:Arial,Helvetica,sans-serif; color:#D52B1E ;font-weight:bold; font-size:x-small}
-                    h2{font-family:georgia,serif; font-style:italic;font-size:x-small;font-weight:normal;}
-                    h1{font-family:georgia,serif; font-weight:bold; font-size:large}
-                    #byline{font-family:georgia,serif; font-weight:bold; font-size:x-small}
-                    #topgraf{font-family:Arial,Helvetica,sans-serif;font-size:x-small;font-weight:bold;}
-                    .artsans{{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
-                '''
    def parse_index(self):
        articles = []

        soup = self.index_to_soup(self.INDEX)
+        sectit = soup.find('h1', attrs={'class':'sectionTitle'})
+        if sectit is not None:
+            texts = sectit.findAll('cufontext')
+            texts = map(self.tag_to_string, texts[-2:])
+            self.timefmt = ' [%s]'%(''.join(texts))

-        issue = soup.find('span', attrs={'class':'issue'})
-        if issue:
-            self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
-
-        cover = soup.find('img', alt=re.compile('Cover'), src=True)
+        cover = soup.find('img', src=True, attrs={'class':'cover'})
        if cover is not None:
-            self.cover_url = 'http://theatlantic.com'+cover['src']
+            self.cover_url = cover['src']

-        for item in soup.findAll('div', attrs={'class':'item'}):
-            a = item.find('a')
-            if a and a.has_key('href'):
+        feeds = []
+        for section in soup.findAll('div', attrs={'class':'magazineSection'}):
+            section_title = section.find(attrs={'class':'sectionHeader'})
+            section_title = string.capwords(self.tag_to_string(section_title))
+            self.log('Found section:', section_title)
+            articles = []
+            for post in section.findAll('div', attrs={'class':'post'}):
+                h = post.find(['h3', 'h4'])
+                title = self.tag_to_string(h)
+                a = post.find('a', href=True)
                url = a['href']
-                if not url.startswith('http://'):
-                    url = 'http://www.theatlantic.com/'+url
-                url = url.replace('/doc/', '/doc/print/')
-                title = self.tag_to_string(a)
-                if title in ('VIDEO', 'AUDIO', 'INTERACTIVE MAP', 'SIDEBAR', 'RECIPES'):
-                    continue
-                title = title.replace('&AMP;', '&')
-                byline = item.find(attrs={'class':'byline'})
-                date = self.tag_to_string(byline) if byline else ''
-                description = ''
+                if url.startswith('/'):
+                    url = 'http://www.theatlantic.com'+url
+                p = post.find('p', attrs={'class':'dek'})
+                desc = None
+                self.log('\tFound article:', title, 'at', url)
+                if p is not None:
+                    desc = self.tag_to_string(p)
+                    self.log('\t\t', desc)
+                articles.append({'title':title, 'url':url, 'description':desc,
+                    'date':''})
+            feeds.append((section_title, articles))

-                self.log('\tFound article:', title)
-                self.log('\t\t', url)
+        poems = []
+        self.log('Found section: Poems')
+        for poem in soup.findAll('div', attrs={'class':'poem'}):
+            title = self.tag_to_string(poem.find('h4'))
+            desc  = self.tag_to_string(poem.find(attrs={'class':'author'}))
+            url   = 'http://www.theatlantic.com'+poem.find('a')['href']
+            self.log('\tFound article:', title, 'at', url)
+            self.log('\t\t', desc)
+            poems.append({'title':title, 'url':url, 'description':desc,
+                    'date':''})
+        if poems:
+            feeds.append(('Poems', poems))

-                articles.append({
-                                 'title':title,
-                                 'date':date,
-                                 'url':url,
-                                 'description':description
-                            })
+        self.log('Found section: Advice')
+        div = soup.find(id='advice')
+        title = self.tag_to_string(div.find('h4'))
+        url = 'http://www.theatlantic.com'+div.find('a')['href']
+        desc = self.tag_to_string(div.find('p'))
+        self.log('\tFound article:', title, 'at', url)
+        self.log('\t\t', desc)

+        feeds.append(('Advice', [{'title':title, 'url':url, 'description':desc,
+                    'date':''}]))
+        return feeds

+    def postprocess_html(self, soup, first):
+        for table in soup.findAll('table', align='right'):
+            img = table.find('img')
+            if img is not None:
+                img.extract()
+                caption = self.tag_to_string(table).strip()
+                div = Tag(soup, 'div')
+                div['style'] = 'text-align:center'
+                div.insert(0, img)
+                div.insert(1, Tag(soup, 'br'))
+                if caption:
+                    div.insert(2, NavigableString(caption))
+                table.replaceWith(div)
+
+        return soup

-        return [('Current Issue', articles)]
--- a/resources/recipes/diariovasco.recipe
+++ b/resources/recipes/diariovasco.recipe
@ -0,0 +1,50 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.diariovasco.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DiarioVasco(BasicNewsRecipe):
+    title                 = 'Diario Vasco'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias de pais Vasco y el resto del mundo'
+    publisher             = 'Diario Vasco'
+    category              = 'news, politics, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.diariovasco.com/img/rd.logotipo2_dvasco.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                         dict(attrs={'id':'title'})
+                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
+                     ]
+    remove_tags = [dict(name='ul')]
+    remove_attributes = ['width','height']
+
+
+    feeds = [
+              (u'Ultimas Noticias' , u'http://www.diariovasco.com/rss/feeds/ultima.xml'       )
+             ,(u'Portada'          , u'http://www.diariovasco.com/portada.xml'                )
+             ,(u'Politica'         , u'http://www.diariovasco.com/rss/feeds/politica.xml'     )
+             ,(u'Deportes'         , u'http://www.diariovasco.com/rss/feeds/deportes.xml'     )
+             ,(u'Economia'         , u'http://www.diariovasco.com/rss/feeds/economia.xml'     )
+             ,(u'Mundo'            , u'http://www.diariovasco.com/rss/feeds/mundo.xml'        )
+             ,(u'Cultura'          , u'http://www.diariovasco.com/rss/feeds/cultura.xml'      )
+             ,(u'Gente'            , u'http://www.diariovasco.com/rss/feeds/gente.xml'        )
+             ,(u'Contraportada'    , u'http://www.diariovasco.com/rss/feeds/contraportada.xml')
+            ]
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@ -119,6 +119,8 @@ class Economist(BasicNewsRecipe):
            ns = NavigableString(self.tag_to_string(caption))
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
+            del img['width']
+            del img['height']
            img.extract()
            div.insert(2, img)
            table.replaceWith(div)
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -123,6 +123,8 @@ class Economist(BasicNewsRecipe):
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
            img.extract()
+            del img['width']
+            del img['height']
            div.insert(2, img)
            table.replaceWith(div)
        return soup
--- a/resources/recipes/epicurious.recipe
+++ b/resources/recipes/epicurious.recipe
@ -0,0 +1,58 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Starson17'
+'''
+www.epicurious.com
+'''
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Epicurious(BasicNewsRecipe):
+    title          = u'Epicurious'
+    __author__  = 'Starson17'
+    description = 'Food and Recipes from Epicurious'
+    cover_url     = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
+    publisher      = 'Epicurious'
+    tags           = 'news, food, gourmet, recipes'          
+    language = 'en'
+    use_embedded_content    = False
+    no_stylesheets        = True
+    remove_javascript = True
+    recursions = 3
+    oldest_article        = 14
+    max_articles_per_feed = 20
+
+    keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
+                      dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
+                           ]
+
+    remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
+                   {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
+                   dict(name='div', attrs={'class':['tagged','comments']})
+                   ]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
+
+    feeds = [
+             (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
+             (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
+             (u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
+             (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')   
+             ]
+    
+    match_regexps = [
+                     r'http://www.epicurious.com/.*recipes/.*/views'
+                     ]
+
+    preprocess_regexps = [
+        (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
+        (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
+        (re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
+        ]
+
+    def postprocess_html(self, soup, first_fetch):
+        for t in soup.findAll(['table', 'tr', 'td']):
+            t.name = 'div'
+        return soup
+        
--- a/resources/recipes/huffingtonpost.recipe
+++ b/resources/recipes/huffingtonpost.recipe
@ -3,7 +3,7 @@ import re

 class HuffingtonPostRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
-    __author__ = 'kwetal'
+    __author__ = 'kwetal and Archana Raman'
    language = 'en'
    version = 2

@ -14,70 +14,89 @@ class HuffingtonPostRecipe(BasicNewsRecipe):

    oldest_article = 1.1
    max_articles_per_feed = 100
-    use_embedded_content = True
+    #use_embedded_content = True

    encoding = 'utf-8'
    remove_empty_feeds = True
+    no_stylesheets = True
+    remove_javascript = True

    # Feeds from: http://www.huffingtonpost.com/syndication/
    feeds = []
    feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))

-    #feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
-    feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
-    feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
+    feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
+    #feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
+    #feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))

-    #feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
-    feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
-    feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
+    feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
+    #feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
+    #feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))

-    #feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
-    feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
-    feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
+    feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
+    #feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
+    #feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))

-    #feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
-    feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
-    feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
+    feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
+    #feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
+    #feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))

-    #feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
-    feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
-    feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
+    feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
+    #feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
+    #feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))

-    #feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
-    feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
-    feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
+    feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
+    #feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
+    #feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))

-    #feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
-    feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
-    feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
+    feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
+    #feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
+    #feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))

-    #feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
-    feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
-    feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
+    feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
+    #feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
+    #feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))

-    #feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
-    feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
-    feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
+    feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
+    #feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
+    #feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))

-    #feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
-    feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
-    feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
+    feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
+    #feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
+    #feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))

    feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
-    feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
+    #feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
+

    remove_tags = []
    remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
    remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
+    remove_tags.append(dict(name='a', attrs={'class' : 'home_pixie'}))
+    remove_tags.append(dict(name='div', attrs={'id' : ["top_nav",'threeup_top_wrapper','breaking_news_container',"hp_social_network"]}))
+    remove_tags.append(dict(name='img', alt="Connect"))
+    remove_tags.append(dict(name='div', attrs={'class' : ['logo']}))    #'share_boxes_box_block_b_wraper',
+    remove_tags.append(dict(name='div', attrs={'class' :[ 'read_more with_verticals','chicklets_box_outter_v05','blogger_menu_content','chicklets_bar']}))
+    remove_tags.append(dict(name='div', attrs={'class' : ['sidebar_blog_first_design','sidebar_blog_second_design',]}))
+    remove_tags.append(dict(name='div', attrs={'class' : ['main_big_news_ontop','login-menu','sidebar_blog_third_design','read_more']}))
+
+
+    remove_tags_after = [dict(name='div', attrs={'class' : 'entry_content'}) ]
+   # remove_attributes = ['style']

-    remove_attributes = ['style']

    extra_css = '''
+                    h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
+                    h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
+                    h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
                    body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
-                    h2{font-size: x-large; font-weight: bold; padding: 0em; margin-bottom: 0.2em;}
-                    a[href]{color: blue; text-decoration: none; cursor: pointer;}
+                    #title_permalink{color:black;font-size:large;}
+                    .date{color:#858585;font-family:"Times New Roman",sans-serif;}
+                    .comments_datetime v05{color:#696969;}
+                    .teaser_permalink{font-style:italic;font-size:xx-small;}
+                    .blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
                    '''
-
+#a[href]{color: blue; text-decoration: none; cursor: pointer;}
    def get_article_url(self, article):
        """
            Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
@ -85,10 +104,21 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
            Todo: refactor to searching this list to avoid the hardcoded zero-index
        """
        link = article.get('link')
+        print("Link:"+link)
        if not link:
            links = article.get('links')
            if links:
                link = links[0]['href']
+                if not links[0]['href']:
+                    link = links[1]['href']

        return link

+    def postprocess_html(self, soup, first_fetch):
+        for tag in soup.findAll('div',text = "What's Your Reaction?"):
+                tag.extract()
+
+        for tg in soup.findAll('blockquote'):
+            tg.extract()
+
+        return soup
--- a/resources/recipes/kathemerini.recipe
+++ b/resources/recipes/kathemerini.recipe
--- a/resources/recipes/kukuburi.recipe
+++ b/resources/recipes/kukuburi.recipe
@ -0,0 +1,37 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'Mori'
+__version__ = 'v. 0.1'
+'''
+Kukuburi.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class KukuburiRecipe(BasicNewsRecipe):
+    __author__ = 'Mori'
+    language = 'en'
+
+    title = u'Kukuburi'
+    publisher = u'Ramón Pérez'
+    description =u'KUKUBURI by Ram\xc3\xb3n P\xc3\xa9rez'
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    oldest_article = 100
+    max_articles_per_feed = 100
+
+    feeds = [
+        (u'Kukuburi', u'http://feeds2.feedburner.com/Kukuburi')
+    ]
+
+    preprocess_regexps = [
+        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+        [
+            (r'<!--.*?-->', lambda match: ''),
+            (r'<div class="feedflare".*?</div>', lambda match: '')
+        ]
+    ]
--- a/resources/recipes/la_segunda.recipe
+++ b/resources/recipes/la_segunda.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lasegunda.com
 '''
@ -19,43 +17,38 @@ class LaSegunda(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    cover_url             = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
-    remove_javascript     = True
-    language = 'es'
+    masthead_url          = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
+    remove_empty_feeds    = True
+    language              = 'es'
+    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '            
    
-    
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+						, 'linearize_tables' : True
+                        }
                        
-    keep_only_tags = [dict(name='table')]
+    remove_tags_before = dict(attrs={'class':'titulonegritastop'})
+    remove_tags        = [dict(name='img')]
+    remove_attributes  = ['width','height']
+	
                        
    feeds = [ 
               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
-              ,(u'Politica', u'http://www.lasegunda.com/rss20/index.asp?canal=21')
-              ,(u'Cronica', u'http://www.lasegunda.com/rss20/index.asp?canal=20')
-              ,(u'Internacional', u'http://www.lasegunda.com/rss20/index.asp?canal=23')
-              ,(u'Deportes', u'http://www.lasegunda.com/rss20/index.asp?canal=24')
-              ,(u'Epectaculos/Cultura', u'http://www.lasegunda.com/rss20/index.asp?canal=25')
-              ,(u'Educacion', u'http://www.lasegunda.com/rss20/index.asp?canal=26')
-              ,(u'Ciencia y Tecnologia', u'http://www.lasegunda.com/rss20/index.asp?canal=27')
-              ,(u'Solidaridad', u'http://www.lasegunda.com/rss20/index.asp?canal=28')
-              ,(u'Buena Vida', u'http://www.lasegunda.com/rss20/index.asp?canal=32')
+              ,(u'Politica'               , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
+              ,(u'Cronica'                , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
+              ,(u'Internacional'          , u'http://www.lasegunda.com/rss20/index.asp?canal=23')
+              ,(u'Deportes'               , u'http://www.lasegunda.com/rss20/index.asp?canal=24')
+              ,(u'Epectaculos/Cultura'    , u'http://www.lasegunda.com/rss20/index.asp?canal=25')
+              ,(u'Educacion'              , u'http://www.lasegunda.com/rss20/index.asp?canal=26')
+              ,(u'Ciencia y Tecnologia'   , u'http://www.lasegunda.com/rss20/index.asp?canal=27')
+              ,(u'Solidaridad'            , u'http://www.lasegunda.com/rss20/index.asp?canal=28')
+              ,(u'Buena Vida'             , u'http://www.lasegunda.com/rss20/index.asp?canal=32')
            ]

    def print_version(self, url):
        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
-
-    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
    
--- a/resources/recipes/la_tercera.recipe
+++ b/resources/recipes/la_tercera.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 latercera.com
 '''
@ -18,32 +16,32 @@ class LaTercera(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
-    remove_javascript     = True
    use_embedded_content  = False
-
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    remove_empty_feeds    = True
+    language              = 'es'
+ 
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+						, 'linearize_tables' : True
+                        }

    keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]

    remove_tags = [
-                     dict(name='script')
-                    ,dict(name='ul')
+                     dict(name=['ul','input','base'])
                    ,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
                    ,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
-                    ,dict(name='input')
                    ,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
                  ]


    feeds = [
               (u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
-              ,(u'Pais', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=654')
+              ,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
+              ,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')              
              ,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
              ,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
              ,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
@ -55,10 +53,6 @@ class LaTercera(BasicNewsRecipe):
            ]

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
-        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
-
-    language = 'es'
--- a/resources/recipes/tanea.recipe
+++ b/resources/recipes/tanea.recipe
--- a/setup/init.py
+++ b/setup/init.py
@ -11,7 +11,8 @@ import sys, re, os, platform
 is64bit = platform.architecture()[0] == '64bit'
 iswindows = re.search('win(32|64)', sys.platform)
 isosx = 'darwin' in sys.platform
-islinux = not isosx and not iswindows
+isfreebsd = 'freebsd' in sys.platform
+islinux = not isosx and not iswindows and not isfreebsd
 SRC = os.path.abspath('src')
 sys.path.insert(0, SRC)
 sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
@ -117,7 +118,7 @@ class Command(object):
        self.real_user = os.environ.get('SUDO_USER', None)

    def drop_privileges(self):
-        if not islinux or isosx:
+        if not islinux or isosx or isfreebsd:
            return
        if self.real_user is not None:
            self.info('Dropping privileges to those of', self.real_user+':',
@ -128,7 +129,7 @@ class Command(object):
            os.seteuid(int(self.real_uid))

    def regain_privileges(self):
-        if not islinux or isosx:
+        if not islinux or isosx or isfreebsd:
            return
        if os.geteuid() != 0 and self.orig_euid == 0:
            self.info('Trying to get root privileges')
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
 fc_lib = '/usr/lib'
 podofo_inc = '/usr/include/podofo'
 podofo_lib = '/usr/lib'
+chmlib_inc_dirs = chmlib_lib_dirs = []

 if iswindows:
    prefix  = r'C:\cygwin\home\kovid\sw'
@ -96,6 +97,10 @@ if iswindows:
    sw_lib_dir  = os.path.join(prefix, 'lib')
    fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
    fc_lib = sw_lib_dir
+    chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
+        'build', 'chmlib-0.40', 'src'))
+    chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
+        'build', 'chmlib-0.40', 'src', 'Release'))
    png_inc_dirs = [sw_inc_dir]
    png_lib_dirs = [sw_lib_dir]
    png_libs = ['png12']
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -11,15 +11,16 @@ from distutils import sysconfig

 from PyQt4.pyqtconfig import QtGuiModuleMakefile

-from setup import Command, islinux, isosx, SRC, iswindows
-from setup.build_environment import fc_inc, fc_lib, \
+from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
+from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
        fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
        podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
        QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
        magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
-        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
+        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
+        jpg_lib_dirs, chmlib_lib_dirs
 MT
-isunix = islinux or isosx
+isunix = islinux or isosx or isfreebsd

 make = 'make' if isunix else NMAKE

@ -56,6 +57,22 @@ if iswindows:
    pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']

 extensions = [
+
+    Extension('chmlib',
+            ['calibre/utils/chm/swig_chm.c'],
+            libraries=['ChmLib' if iswindows else 'chm'],
+            inc_dirs=chmlib_inc_dirs,
+            lib_dirs=chmlib_lib_dirs,
+            cflags=["-DSWIG_COBJECT_TYPES"]),
+
+    Extension('chm_extra',
+            ['calibre/utils/chm/extra.c'],
+            libraries=['ChmLib' if iswindows else 'chm'],
+            inc_dirs=chmlib_inc_dirs,
+            lib_dirs=chmlib_lib_dirs,
+            cflags=["-D__PYTHON__"]),
+
+
    Extension('pdfreflow',
                reflow_sources,
                headers=reflow_headers,
@ -154,6 +171,13 @@ if islinux:
    ldflags.append('-lpython'+sysconfig.get_python_version())


+if isfreebsd:
+    cflags.append('-pthread')
+    ldflags.append('-shared')
+    cflags.append('-I'+sysconfig.get_python_inc())
+    ldflags.append('-lpython'+sysconfig.get_python_version())
+
+
 if isosx:
    x, p = ('i386', 'ppc')
    archs = ['-arch', x, '-arch', p, '-isysroot',
--- a/setup/install.py
+++ b/setup/install.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'

 import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex

-from setup import Command, islinux, basenames, modules, functions, \
+from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
        __appname__, __version__

 HEADER = '''\
@ -116,7 +116,7 @@ class Develop(Command):


    def pre_sub_commands(self, opts):
-        if not islinux:
+        if not (islinux or isfreebsd):
            self.info('\nSetting up a source based development environment is only '
                    'supported on linux. On other platforms, see the User Manual'
                    ' for help with setting up a development environment.')
@ -156,7 +156,7 @@ class Develop(Command):
            self.warn('Failed to compile mount helper. Auto mounting of',
                ' devices will not work')

-        if os.geteuid() != 0:
+        if not isfreebsd and os.geteuid() != 0:
            return self.warn('Must be run as root to compile mount helper. Auto '
                    'mounting of devices will not work.')
        src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
@ -168,9 +168,10 @@ class Develop(Command):
        ret = p.wait()
        if ret != 0:
            return warn()
-        os.chown(dest, 0, 0)
-        os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
-                stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
+        if not isfreebsd:
+            os.chown(dest, 0, 0)
+            os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
+                    stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
        self.manifest.append(dest)
        return dest

--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -42,6 +42,7 @@ class LinuxFreeze(Command):
                        '/usr/lib/liblcms.so.1',
                        '/tmp/calibre-mount-helper',
                        '/usr/lib/libunrar.so',
+                        '/usr/lib/libchm.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libmng.so.1',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -459,7 +459,7 @@ class Py2App(object):

    @flush
    def add_misc_libraries(self):
-        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
+        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
            info('\nAdding', x)
            x = 'lib%s.dylib'%x
            shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -12,7 +12,7 @@ warnings.simplefilter('ignore', DeprecationWarning)


 from calibre.startup import plugins, winutil, winutilerror
-from calibre.constants import iswindows, isosx, islinux, isfrozen, \
+from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
                              terminal_controller, preferred_encoding, \
                              __appname__, __version__, __author__, \
                              win32event, win32api, winerror, fcntl, \
@ -22,7 +22,7 @@ import mechanize
 if False:
    winutil, winutilerror, __appname__, islinux, __version__
    fcntl, win32event, isfrozen, __author__, terminal_controller
-    winerror, win32api
+    winerror, win32api, isfreebsd

 mimetypes.add_type('application/epub+zip',                '.epub')
 mimetypes.add_type('text/x-sony-bbeb+xml',                '.lrs')
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -22,7 +22,8 @@ terminal_controller = TerminalController(sys.stdout)
 iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
 isosx     = 'darwin' in sys.platform.lower()
 isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
-islinux   = not(iswindows or isosx)
+isfreebsd = 'freebsd' in sys.platform.lower()
+islinux   = not(iswindows or isosx or isfreebsd)
 isfrozen  = hasattr(sys, 'frozen')
 isunix = isosx or islinux

@ -56,7 +57,8 @@ if plugins is None:
        sys.path.insert(0, plugin_path)

        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
-            'fontconfig', 'pdfreflow', 'progress_indicator'] + \
+            'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
+            'chm_extra'] + \
                    (['winutil'] if iswindows else []) + \
                    (['usbobserver'] if isosx else []):
            try:
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -119,11 +119,34 @@ class Plugin(object):

    def __enter__(self, *args):
        if self.plugin_path is not None:
-            sys.path.insert(0, self.plugin_path)
+            from calibre.utils.zipfile import ZipFile
+            zf = ZipFile(self.plugin_path)
+            extensions = set([x.rpartition('.')[-1].lower() for x in
+                zf.namelist()])
+            zip_safe = True
+            for ext in ('pyd', 'so', 'dll', 'dylib'):
+                if ext in extensions:
+                    zip_safe = False
+            if zip_safe:
+                sys.path.insert(0, self.plugin_path)
+                self._sys_insertion_path = self.plugin_path
+            else:
+                from calibre.ptempfile import TemporaryDirectory
+                self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
+                self._sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
+                zf.extractall(self._sys_insertion_path)
+                sys.path.insert(0, self._sys_insertion_path)
+            zf.close()
+

    def __exit__(self, *args):
-        if self.plugin_path in sys.path:
-            sys.path.remove(self.plugin_path)
+        ip, it = getattr(self, '_sys_insertion_path', None), getattr(self,
+                '_sys_insertion_tdir', None)
+        if ip in sys.path:
+            sys.path.remove(ip)
+        if hasattr(it, '__exit__'):
+            it.__exit__(*args)
+


 class FileTypePlugin(Plugin):
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -103,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
            mi.cover_data = (ext.lower(), data)
        return mi

+class CHMMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read CHM metadata'
+    file_types  = set(['chm'])
+    description = _('Read metadata from %s files') % 'CHM'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.chm import get_metadata
+        return get_metadata(stream)
+
+
 class EPUBMetadataReader(MetadataReaderPlugin):

    name        = 'Read EPUB metadata'
@ -384,6 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.tcr.input import TCRInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lrf.input import LRFInput
+from calibre.ebooks.chm.input import CHMInput

 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.fb2.output import FB2Output
@ -444,6 +456,7 @@ plugins += [
    TCRInput,
    TXTInput,
    LRFInput,
+    CHMInput,
 ]
 plugins += [
    EPUBOutput,
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -401,7 +401,7 @@ def initialize_plugins():
                plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
            except PluginNotFound:
                continue
-            plugin = initialize_plugin(plugin, zfp if not isinstance(zfp, type) else zfp)
+            plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
            _initialized_plugins.append(plugin)
        except:
            print 'Failed to initialize plugin...'
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -23,6 +23,8 @@ Run an embedded python interpreter.
                      help='Debug the specified device driver.')
    parser.add_option('-g', '--gui',  default=False, action='store_true',
                      help='Run the GUI',)
+    parser.add_option('-w', '--viewer',  default=False, action='store_true',
+                      help='Run the ebook viewer',)
    parser.add_option('--paths', default=False, action='store_true',
            help='Output the paths necessary to setup the calibre environment')
    parser.add_option('--migrate', action='store_true', default=False,
@ -98,6 +100,12 @@ def main(args=sys.argv):
    if opts.gui:
        from calibre.gui2.main import main
        main(['calibre'])
+    elif opts.viewer:
+        from calibre.gui2.viewer.main import main
+        vargs = ['ebook-viewer', '--debug-javascript']
+        if len(args) > 1:
+            vargs.append(args[-1])
+        main(vargs)
    elif opts.command:
        sys.argv = args[:1]
        exec opts.command
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -15,7 +15,7 @@ class ANDROID(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']

    # Ordered list of supported formats
-    FORMATS     = ['epub']
+    FORMATS     = ['epub', 'pdf']

    VENDOR_ID   = {
            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
--- a/src/calibre/devices/libusb.py
+++ b/src/calibre/devices/libusb.py
@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
                   c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
 from errno import EBUSY, ENOMEM

-from calibre import iswindows, isosx, load_library
+from calibre import iswindows, isosx, isfreebsd, load_library

 _libusb_name = 'libusb'
-PATH_MAX = 511 if iswindows else 1024 if isosx else 4096
+PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
 if iswindows:
    class Structure(_Structure):
        _pack_ = 1
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -398,16 +398,6 @@ class Device(DeviceConfig, DevicePlugin):
        if len(matches) > 2:
            drives['cardb'] = matches[2]

-        pat = self.OSX_MAIN_MEM_VOL_PAT
-        if pat is not None and len(drives) > 1 and 'main' in drives:
-            if pat.search(drives['main']) is None:
-                main = drives['main']
-                for x in ('carda', 'cardb'):
-                    if x in drives and pat.search(drives[x]):
-                        drives['main'] = drives.pop(x)
-                        drives[x] = main
-                        break
-
        return drives

    def osx_bsd_names(self):
@ -431,6 +421,16 @@ class Device(DeviceConfig, DevicePlugin):
        if drives['main'] is None:
            print bsd_drives, mount_map, drives
            raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
+        pat = self.OSX_MAIN_MEM_VOL_PAT
+        if pat is not None and len(drives) > 1 and 'main' in drives:
+            if pat.search(drives['main']) is None:
+                main = drives['main']
+                for x in ('carda', 'cardb'):
+                    if x in drives and pat.search(drives[x]):
+                        drives['main'] = drives.pop(x)
+                        drives[x] = main
+                        break
+
        self._main_prefix = drives['main']+os.sep
        def get_card_prefix(c):
            ans = drives.get(c, None)
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -1,213 +1,17 @@
-from __future__ import with_statement
 ''' CHM File decoding support '''
 __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                 ' and Alex Bramley <a.bramley at gmail.com>.'

-import os, shutil, uuid, re
-from tempfile import mkdtemp
-from mimetypes import guess_type as guess_mimetype
+import os, uuid

-from BeautifulSoup import BeautifulSoup, NavigableString
 from lxml import html
-from pychm.chm import CHMFile
-from pychm.chmlib import (
-  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
-  chm_enumerate,
-)

-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre.utils.config import OptionParser
-from calibre.ebooks.metadata.toc import TOC
+from calibre.customize.conversion import InputFormatPlugin
+from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename

-
-def match_string(s1, s2_already_lowered):
-    if s1 is not None and s2_already_lowered is not None:
-        if s1.lower()==s2_already_lowered:
-            return True
-    return False
-
-def check_all_prev_empty(tag):
-    if tag is None:
-        return True
-    if tag.__class__ == NavigableString and not check_empty(tag):
-        return False
-    return check_all_prev_empty(tag.previousSibling)
-
-def check_empty(s, rex = re.compile(r'\S')):
-    return rex.search(s) is None
-
-
-def option_parser():
-    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
-    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
-    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
-    parser.add_option("-t", "--title", action="store", type="string", \
-                    dest="title", help=_("Set the book title"))
-    parser.add_option('--title-sort', action='store', type='string', default=None,
-                      dest='title_sort', help=_('Set sort key for the title'))
-    parser.add_option("-a", "--author", action="store", type="string", \
-                    dest="author", help=_("Set the author"))
-    parser.add_option('--author-sort', action='store', type='string', default=None,
-                      dest='author_sort', help=_('Set sort key for the author'))
-    parser.add_option("-c", "--category", action="store", type="string", \
-                    dest="category", help=_("The category this book belongs"
-                    " to. E.g.: History"))
-    parser.add_option("--thumbnail", action="store", type="string", \
-                    dest="thumbnail", help=_("Path to a graphic that will be"
-                    " set as this files' thumbnail"))
-    parser.add_option("--comment", action="store", type="string", \
-                    dest="freetext", help=_("Path to a txt file containing a comment."))
-    parser.add_option("--get-thumbnail", action="store_true", \
-                    dest="get_thumbnail", default=False, \
-                    help=_("Extract thumbnail from LRF file"))
-    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
-    parser.add_option('--classification', default=None, help=_('Set the book classification'))
-    parser.add_option('--creator', default=None, help=_('Set the book creator'))
-    parser.add_option('--producer', default=None, help=_('Set the book producer'))
-    parser.add_option('--get-cover', action='store_true', default=False,
-                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
-    parser.add_option('--bookid', action='store', type='string', default=None,
-                      dest='book_id', help=_('Set book ID'))
-    parser.add_option('--font-delta', action='store', type='int', default=0,
-                      dest='font_delta', help=_('Set font delta'))
-    return parser
-
-class CHMError(Exception):
-    pass
-
-class CHMReader(CHMFile):
-    def __init__(self, input, log):
-        CHMFile.__init__(self)
-        if not self.LoadCHM(input):
-            raise CHMError("Unable to open CHM file '%s'"%(input,))
-        self.log = log
-        self._sourcechm = input
-        self._contents = None
-        self._playorder = 0
-        self._metadata = False
-        self._extracted = False
-
-        # location of '.hhc' file, which is the CHM TOC.
-        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
-        self.hhc_path = self.root + ".hhc"
-
-
-    def _parse_toc(self, ul, basedir=os.getcwdu()):
-        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
-        self._playorder += 1
-        for li in ul('li', recursive=False):
-            href = li.object('param', {'name': 'Local'})[0]['value']
-            if href.count('#'):
-                href, frag = href.split('#')
-            else:
-                frag = None
-            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
-            #print "========>", name
-            toc.add_item(href, frag, name, play_order=self._playorder)
-            self._playorder += 1
-            if li.ul:
-               child = self._parse_toc(li.ul)
-               child.parent = toc
-               toc.append(child)
-        #print toc
-        return toc
-
-
-    def GetFile(self, path):
-        # have to have abs paths for ResolveObject, but Contents() deliberately
-        # makes them relative. So we don't have to worry, re-add the leading /.
-        # note this path refers to the internal CHM structure
-        if path[0] != '/':
-            path = '/' + path
-        res, ui = self.ResolveObject(path)
-        if res != CHM_RESOLVE_SUCCESS:
-            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
-        size, data = self.RetrieveObject(ui)
-        if size == 0:
-            raise CHMError("'%s' is zero bytes in length!"%(path,))
-        return data
-
-    def ExtractFiles(self, output_dir=os.getcwdu()):
-        for path in self.Contents():
-            lpath = os.path.join(output_dir, path)
-            self._ensure_dir(lpath)
-            data = self.GetFile(path)
-            with open(lpath, 'wb') as f:
-                if guess_mimetype(path)[0] == ('text/html'):
-                    data = self._reformat(data)
-                f.write(data)
-        #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
-        self._extracted = True
-
-    def _reformat(self, data):
-        try:
-            soup = BeautifulSoup(data)
-        except UnicodeEncodeError:
-            # hit some strange encoding problems...
-            print "Unable to parse html for cleaning, leaving it :("
-            return data
-        # nuke javascript...
-        [s.extract() for s in soup('script')]
-        # remove forward and back nav bars from the top/bottom of each page
-        # cos they really fuck with the flow of things and generally waste space
-        # since we can't use [a,b] syntax to select arbitrary items from a list
-        # we'll have to do this manually...
-        t = soup('table')
-        if t:
-            if (t[0].previousSibling is None
-              or t[0].previousSibling.previousSibling is None):
-                t[0].extract()
-            if (t[-1].nextSibling is None
-              or t[-1].nextSibling.nextSibling is None):
-                t[-1].extract()
-        # for some very odd reason each page's content appears to be in a table
-        # too. and this table has sub-tables for random asides... grr.
-
-        # remove br at top of page if present after nav bars removed
-        br = soup('br')
-        if br:
-            if check_all_prev_empty(br[0].previousSibling):
-                br[0].extract()
-
-        # some images seem to be broken in some chm's :/
-        for img in soup('img'):
-            try:
-                # some are supposedly "relative"... lies.
-                while img['src'].startswith('../'): img['src'] = img['src'][3:]
-                # some have ";<junk>" at the end.
-                img['src'] = img['src'].split(';')[0]
-            except KeyError:
-                # and some don't even have a src= ?!
-                pass
-        # now give back some pretty html.
-        return soup.prettify()
-
-    def Contents(self):
-        if self._contents is not None:
-            return self._contents
-        paths = []
-        def get_paths(chm, ui, ctx):
-            # skip directories
-            # note this path refers to the internal CHM structure
-            if ui.path[-1] != '/':
-                # and make paths relative
-                paths.append(ui.path.lstrip('/'))
-        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
-        self._contents = paths
-        return self._contents
-
-    def _ensure_dir(self, path):
-        dir = os.path.dirname(path)
-        if not os.path.isdir(dir):
-            os.makedirs(dir)
-
-    def extract_content(self, output_dir=os.getcwdu()):
-        self.ExtractFiles(output_dir=output_dir)
-
-
 class CHMInput(InputFormatPlugin):

    name        = 'CHM Input'
@ -215,12 +19,8 @@ class CHMInput(InputFormatPlugin):
    description = 'Convert CHM files to OEB'
    file_types  = set(['chm'])

-    options = set([
-        OptionRecommendation(name='dummy_option', recommended_value=False,
-            help=_('dummy option until real options are determined.')),
-    ])
-
    def _chmtohtml(self, output_dir, chm_path, no_images, log):
+        from calibre.ebooks.chm.reader import CHMReader
        log.debug('Opening CHM file')
        rdr = CHMReader(chm_path, log)
        log.debug('Extracting CHM to %s' % output_dir)
@ -230,37 +30,36 @@ class CHMInput(InputFormatPlugin):

    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.metadata.chm import get_metadata_
+        from calibre.customize.ui import plugin_for_input_format

        log.debug('Processing CHM...')
-        tdir = mkdtemp(prefix='chm2oeb_')
-        from calibre.customize.ui import plugin_for_input_format
-        html_input = plugin_for_input_format('html')
-        for opt in html_input.options:
-            setattr(options, opt.option.name, opt.recommended_value)
-        options.input_encoding = 'utf-8'
-        no_images = False #options.no_images
-        chm_name = stream.name
-        #chm_data = stream.read()
+        with TemporaryDirectory('chm2oeb') as tdir:
+            html_input = plugin_for_input_format('html')
+            for opt in html_input.options:
+                setattr(options, opt.option.name, opt.recommended_value)
+            options.input_encoding = 'utf-8'
+            no_images = False #options.no_images
+            chm_name = stream.name
+            #chm_data = stream.read()

-        #closing stream so CHM can be opened by external library
-        stream.close()
-        log.debug('tdir=%s' % tdir)
-        log.debug('stream.name=%s' % stream.name)
-        mainname = self._chmtohtml(tdir, chm_name, no_images, log)
-        mainpath = os.path.join(tdir, mainname)
+            #closing stream so CHM can be opened by external library
+            stream.close()
+            log.debug('tdir=%s' % tdir)
+            log.debug('stream.name=%s' % stream.name)
+            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
+            mainpath = os.path.join(tdir, mainname)

-        metadata = get_metadata_(tdir)
+            metadata = get_metadata_(tdir)

-        odi = options.debug_pipeline
-        options.debug_pipeline = None
-        # try a custom conversion:
-        #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
-        # try using html converter:
-        htmlpath = self._create_html_root(mainpath, log)
-        oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
-        options.debug_pipeline = odi
-        #log.debug('DEBUG: Not removing tempdir %s' % tdir)
-        shutil.rmtree(tdir)
+            odi = options.debug_pipeline
+            options.debug_pipeline = None
+            # try a custom conversion:
+            #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
+            # try using html converter:
+            htmlpath = self._create_html_root(mainpath, log)
+            oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
+            options.debug_pipeline = odi
+            #log.debug('DEBUG: Not removing tempdir %s' % tdir)
        return oeb

    def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
@ -369,6 +168,8 @@ class CHMInput(InputFormatPlugin):
        # check that node is a normal node (not a comment, DOCTYPE, etc.)
        # (normal nodes have string tags)
        if isinstance(node.tag, basestring):
+            from calibre.ebooks.chm.reader import match_string
+
            if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
                for child in node:
                    if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -0,0 +1,207 @@
+from __future__ import with_statement
+''' CHM File decoding support '''
+__license__ = 'GPL v3'
+__copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
+                 ' and Alex Bramley <a.bramley at gmail.com>.'
+
+import os, re
+from mimetypes import guess_type as guess_mimetype
+
+from BeautifulSoup import BeautifulSoup, NavigableString
+
+from calibre.utils.chm.chm import CHMFile
+from calibre.utils.chm.chmlib import (
+  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
+  chm_enumerate,
+)
+
+from calibre.utils.config import OptionParser
+from calibre.ebooks.metadata.toc import TOC
+
+
+def match_string(s1, s2_already_lowered):
+    if s1 is not None and s2_already_lowered is not None:
+        if s1.lower()==s2_already_lowered:
+            return True
+    return False
+
+def check_all_prev_empty(tag):
+    if tag is None:
+        return True
+    if tag.__class__ == NavigableString and not check_empty(tag):
+        return False
+    return check_all_prev_empty(tag.previousSibling)
+
+def check_empty(s, rex = re.compile(r'\S')):
+    return rex.search(s) is None
+
+
+def option_parser():
+    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
+    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
+    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
+    parser.add_option("-t", "--title", action="store", type="string", \
+                    dest="title", help=_("Set the book title"))
+    parser.add_option('--title-sort', action='store', type='string', default=None,
+                      dest='title_sort', help=_('Set sort key for the title'))
+    parser.add_option("-a", "--author", action="store", type="string", \
+                    dest="author", help=_("Set the author"))
+    parser.add_option('--author-sort', action='store', type='string', default=None,
+                      dest='author_sort', help=_('Set sort key for the author'))
+    parser.add_option("-c", "--category", action="store", type="string", \
+                    dest="category", help=_("The category this book belongs"
+                    " to. E.g.: History"))
+    parser.add_option("--thumbnail", action="store", type="string", \
+                    dest="thumbnail", help=_("Path to a graphic that will be"
+                    " set as this files' thumbnail"))
+    parser.add_option("--comment", action="store", type="string", \
+                    dest="freetext", help=_("Path to a txt file containing a comment."))
+    parser.add_option("--get-thumbnail", action="store_true", \
+                    dest="get_thumbnail", default=False, \
+                    help=_("Extract thumbnail from LRF file"))
+    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
+    parser.add_option('--classification', default=None, help=_('Set the book classification'))
+    parser.add_option('--creator', default=None, help=_('Set the book creator'))
+    parser.add_option('--producer', default=None, help=_('Set the book producer'))
+    parser.add_option('--get-cover', action='store_true', default=False,
+                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
+    parser.add_option('--bookid', action='store', type='string', default=None,
+                      dest='book_id', help=_('Set book ID'))
+    parser.add_option('--font-delta', action='store', type='int', default=0,
+                      dest='font_delta', help=_('Set font delta'))
+    return parser
+
+class CHMError(Exception):
+    pass
+
+class CHMReader(CHMFile):
+    def __init__(self, input, log):
+        CHMFile.__init__(self)
+        if not self.LoadCHM(input):
+            raise CHMError("Unable to open CHM file '%s'"%(input,))
+        self.log = log
+        self._sourcechm = input
+        self._contents = None
+        self._playorder = 0
+        self._metadata = False
+        self._extracted = False
+
+        # location of '.hhc' file, which is the CHM TOC.
+        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
+        self.hhc_path = self.root + ".hhc"
+
+
+    def _parse_toc(self, ul, basedir=os.getcwdu()):
+        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
+        self._playorder += 1
+        for li in ul('li', recursive=False):
+            href = li.object('param', {'name': 'Local'})[0]['value']
+            if href.count('#'):
+                href, frag = href.split('#')
+            else:
+                frag = None
+            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
+            #print "========>", name
+            toc.add_item(href, frag, name, play_order=self._playorder)
+            self._playorder += 1
+            if li.ul:
+               child = self._parse_toc(li.ul)
+               child.parent = toc
+               toc.append(child)
+        #print toc
+        return toc
+
+
+    def GetFile(self, path):
+        # have to have abs paths for ResolveObject, but Contents() deliberately
+        # makes them relative. So we don't have to worry, re-add the leading /.
+        # note this path refers to the internal CHM structure
+        if path[0] != '/':
+            path = '/' + path
+        res, ui = self.ResolveObject(path)
+        if res != CHM_RESOLVE_SUCCESS:
+            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
+        size, data = self.RetrieveObject(ui)
+        if size == 0:
+            raise CHMError("'%s' is zero bytes in length!"%(path,))
+        return data
+
+    def ExtractFiles(self, output_dir=os.getcwdu()):
+        for path in self.Contents():
+            lpath = os.path.join(output_dir, path)
+            self._ensure_dir(lpath)
+            data = self.GetFile(path)
+            with open(lpath, 'wb') as f:
+                if guess_mimetype(path)[0] == ('text/html'):
+                    data = self._reformat(data)
+                f.write(data)
+        #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
+        self._extracted = True
+
+    def _reformat(self, data):
+        try:
+            soup = BeautifulSoup(data)
+        except UnicodeEncodeError:
+            # hit some strange encoding problems...
+            print "Unable to parse html for cleaning, leaving it :("
+            return data
+        # nuke javascript...
+        [s.extract() for s in soup('script')]
+        # remove forward and back nav bars from the top/bottom of each page
+        # cos they really fuck with the flow of things and generally waste space
+        # since we can't use [a,b] syntax to select arbitrary items from a list
+        # we'll have to do this manually...
+        t = soup('table')
+        if t:
+            if (t[0].previousSibling is None
+              or t[0].previousSibling.previousSibling is None):
+                t[0].extract()
+            if (t[-1].nextSibling is None
+              or t[-1].nextSibling.nextSibling is None):
+                t[-1].extract()
+        # for some very odd reason each page's content appears to be in a table
+        # too. and this table has sub-tables for random asides... grr.
+
+        # remove br at top of page if present after nav bars removed
+        br = soup('br')
+        if br:
+            if check_all_prev_empty(br[0].previousSibling):
+                br[0].extract()
+
+        # some images seem to be broken in some chm's :/
+        for img in soup('img'):
+            try:
+                # some are supposedly "relative"... lies.
+                while img['src'].startswith('../'): img['src'] = img['src'][3:]
+                # some have ";<junk>" at the end.
+                img['src'] = img['src'].split(';')[0]
+            except KeyError:
+                # and some don't even have a src= ?!
+                pass
+        # now give back some pretty html.
+        return soup.prettify()
+
+    def Contents(self):
+        if self._contents is not None:
+            return self._contents
+        paths = []
+        def get_paths(chm, ui, ctx):
+            # skip directories
+            # note this path refers to the internal CHM structure
+            if ui.path[-1] != '/':
+                # and make paths relative
+                paths.append(ui.path.lstrip('/'))
+        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
+        self._contents = paths
+        return self._contents
+
+    def _ensure_dir(self, path):
+        dir = os.path.dirname(path)
+        if not os.path.isdir(dir):
+            os.makedirs(dir)
+
+    def extract_content(self, output_dir=os.getcwdu()):
+        self.ExtractFiles(output_dir=output_dir)
+
+
+
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -731,7 +731,8 @@ OptionRecommendation(name='timestamp',
            zf = ZipFile(os.path.join(self.opts.debug_pipeline,
                'periodical.downloaded_recipe'), 'w')
            zf.add_dir(out_dir)
-            self.input_plugin.save_download(zf)
+            with self.input_plugin:
+                self.input_plugin.save_download(zf)
            zf.close()

        self.log.info('Input debug saved to:', out_dir)
@ -780,28 +781,29 @@ OptionRecommendation(name='timestamp',
        self.ui_reporter(0.01, _('Converting input to HTML...'))
        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
        self.input_plugin.report_progress = ir
-        self.oeb = self.input_plugin(stream, self.opts,
-                                    self.input_fmt, self.log,
-                                    accelerators, tdir)
-        if self.opts.debug_pipeline is not None:
-            self.dump_input(self.oeb, tdir)
-            if self.abort_after_input_dump:
-                return
-        if self.input_fmt in ('recipe', 'downloaded_recipe'):
-            self.opts_to_mi(self.user_metadata)
-        if not hasattr(self.oeb, 'manifest'):
-            self.oeb = create_oebbook(self.log, self.oeb, self.opts,
-                    self.input_plugin)
-        self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
-        self.opts.is_image_collection = self.input_plugin.is_image_collection
-        pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
-        self.flush()
-        if self.opts.debug_pipeline is not None:
-            out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
-            self.dump_oeb(self.oeb, out_dir)
-            self.log('Parsed HTML written to:', out_dir)
-        self.input_plugin.specialize(self.oeb, self.opts, self.log,
-                self.output_fmt)
+        with self.input_plugin:
+            self.oeb = self.input_plugin(stream, self.opts,
+                                        self.input_fmt, self.log,
+                                        accelerators, tdir)
+            if self.opts.debug_pipeline is not None:
+                self.dump_input(self.oeb, tdir)
+                if self.abort_after_input_dump:
+                    return
+            if self.input_fmt in ('recipe', 'downloaded_recipe'):
+                self.opts_to_mi(self.user_metadata)
+            if not hasattr(self.oeb, 'manifest'):
+                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
+                        self.input_plugin)
+            self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
+            self.opts.is_image_collection = self.input_plugin.is_image_collection
+            pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
+            self.flush()
+            if self.opts.debug_pipeline is not None:
+                out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
+                self.dump_oeb(self.oeb, out_dir)
+                self.log('Parsed HTML written to:', out_dir)
+            self.input_plugin.specialize(self.oeb, self.opts, self.log,
+                    self.output_fmt)

        pr(0., _('Running transforms on ebook...'))

@ -891,7 +893,8 @@ OptionRecommendation(name='timestamp',
        our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
        self.output_plugin.report_progress = our
        our(0., _('Creating')+' %s'%self.output_plugin.name)
-        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
+        with self.output_plugin:
+            self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
                self.opts, self.log)
        self.ui_reporter(1.)
        run_plugins_on_postprocess(self.output, self.output_fmt)
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -157,11 +157,9 @@ class EPUBOutput(OutputFormatPlugin):

        self.workaround_ade_quirks()
        self.workaround_webkit_quirks()
-        self.workaround_sony_quirks()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages()(oeb, opts)

-
        from calibre.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                max_flow_size=self.opts.flow_size*1024
@ -170,6 +168,8 @@ class EPUBOutput(OutputFormatPlugin):

        self.insert_cover()

+        self.workaround_sony_quirks()
+
        with TemporaryDirectory('_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -20,7 +20,7 @@ from itertools import izip
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
-from calibre.constants import islinux
+from calibre.constants import islinux, isfreebsd
 from calibre import unicode_path
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
@ -346,7 +346,7 @@ class HTMLInput(InputFormatPlugin):
        self.added_resources = {}
        self.log = log
        for path, href in htmlfile_map.items():
-            if not islinux:
+            if not (islinux or isfreebsd):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
@ -417,7 +417,7 @@ class HTMLInput(InputFormatPlugin):
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
-        if not islinux:
+        if not (islinux or isfreebsd):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -215,6 +215,28 @@ def merge_results(one, two):
        else:
            one[idx].smart_update(x)

+class MetadataSources(object):
+
+    def __init__(self, sources):
+        self.sources = sources
+
+    def __enter__(self):
+        for s in self.sources:
+            s.__enter__()
+        return self
+
+    def __exit__(self, *args):
+        for s in self.sources:
+            s.__exit__()
+
+    def __call__(self, *args, **kwargs):
+        for s in self.sources:
+            s(*args, **kwargs)
+
+    def join(self):
+        for s in self.sources:
+            s.join()
+
 def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
           verbose=0):
    assert not(title is None and author is None and publisher is None and \
@ -224,11 +246,10 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
    if isbn is not None:
        isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
    fetchers = list(metadata_sources(isbndb_key=isbndb_key))
+    with MetadataSources(fetchers) as manager:
+        manager(title, author, publisher, isbn, verbose)
+        manager.join()

-    for fetcher in fetchers:
-        fetcher(title, author, publisher, isbn, verbose)
-    for fetcher in fetchers:
-        fetcher.join()
    results = list(fetchers[0].results)
    for fetcher in fetchers[1:]:
        merge_results(results, fetcher.results)
@ -243,10 +264,9 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
 def get_social_metadata(mi, verbose=0):
    from calibre.customize.ui import metadata_sources
    fetchers = list(metadata_sources(metadata_type='social'))
-    for fetcher in fetchers:
-        fetcher(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
-    for fetcher in fetchers:
-        fetcher.join()
+    with MetadataSources(fetchers) as manager:
+        manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
+        manager.join()
    ratings, tags, comments = [], set([]), set([])
    for fetcher in fetchers:
        if fetcher.results:
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -4,13 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''

-import functools
-import os
-import re
-import struct
-import textwrap
-import cStringIO
-import sys
+import functools, shutil, os, re, struct, textwrap, cStringIO, sys

 try:
    from PIL import Image as PILImage
@ -620,6 +614,16 @@ class MobiReader(object):
                * opf.cover.split('/'))):
                opf.cover = None

+        cover = opf.cover
+        if cover is not None:
+            cover = cover.replace('/', os.sep)
+            if os.path.exists(cover):
+                ncover = 'images'+os.sep+'calibre_cover.jpg'
+                if os.path.exists(ncover):
+                    os.remove(ncover)
+                shutil.copyfile(cover, ncover)
+            opf.cover = ncover.replace(os.sep, '/')
+
        manifest = [(htmlfile, 'application/xhtml+xml'),
            (os.path.abspath('styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -173,7 +173,8 @@ class EbookIterator(object):
            plumber.opts.no_process = True

        plumber.input_plugin.for_viewer = True
-        self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
+        with plumber.input_plugin:
+            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
                plumber.opts, plumber.input_fmt, self.log,
                {}, self.base)

--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -13,7 +13,7 @@ from functools import partial

 from calibre.ebooks import ConversionError, DRMError
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre import isosx, iswindows, islinux
+from calibre import isosx, iswindows, islinux, isfreebsd
 from calibre import CurrentDir

 PDFTOHTML = 'pdftohtml'
@ -23,7 +23,7 @@ if isosx and hasattr(sys, 'frameworks_dir'):
 if iswindows and hasattr(sys, 'frozen'):
    PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
    popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
-if islinux and getattr(sys, 'frozen_path', False):
+if (islinux or isfreebsd) and getattr(sys, 'frozen_path', False):
    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')

 def pdftohtml(output_dir, pdf_path, no_images):
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@ -142,7 +142,7 @@ class RTFMLizer(object):
    def image_to_hexstring(self, data):
        im = Image.open(cStringIO.StringIO(data))
        data = cStringIO.StringIO()
-        im.save(data, 'JPEG')
+        im.convert('RGB').save(data, 'JPEG')
        data = data.getvalue()

        raw_hex = ''
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -12,7 +12,7 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \

 ORG_NAME = 'KovidsBrain'
 APP_UID  = 'libprs500'
-from calibre import islinux, iswindows, isosx
+from calibre import islinux, iswindows, isosx, isfreebsd
 from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
 from calibre.utils.localization import set_qt_translator
 from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
@ -579,7 +579,7 @@ _store_app = None

 def is_ok_to_use_qt():
    global gui_thread, _store_app
-    if islinux and ':' not in os.environ.get('DISPLAY', ''):
+    if (islinux or isfreebsd) and ':' not in os.environ.get('DISPLAY', ''):
        return False
    if _store_app is None and QApplication.instance() is None:
        _store_app = QApplication([])
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -25,7 +25,7 @@ from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, \
        authors_to_string, check_isbn
 from calibre.ebooks.metadata.library_thing import cover_from_isbn
-from calibre import islinux
+from calibre import islinux, isfreebsd
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.utils.config import prefs, tweaks
 from calibre.utils.date import qt_to_dt
@ -389,8 +389,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        height_of_rest = self.frameGeometry().height() - self.cover.height()
        width_of_rest  = self.frameGeometry().width() - self.cover.width()
        ag = QCoreApplication.instance().desktop().availableGeometry(self)
-        self.cover.MAX_HEIGHT = ag.height()-(25 if islinux else 0)-height_of_rest
-        self.cover.MAX_WIDTH = ag.width()-(25 if islinux else 0)-width_of_rest
+        self.cover.MAX_HEIGHT = ag.height()-(25 if (islinux or isfreebsd) else 0)-height_of_rest
+        self.cover.MAX_WIDTH = ag.width()-(25 if (islinux or isfreebsd) else 0)-width_of_rest
        if cover:
            pm = QPixmap()
            pm.loadFromData(cover)
--- a/src/calibre/gui2/lrf_renderer/main.py
+++ b/src/calibre/gui2/lrf_renderer/main.py
@ -5,7 +5,7 @@ import sys, logging, os, traceback, time
 from PyQt4.QtGui import QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon
 from PyQt4.QtCore import Qt, QObject, SIGNAL, QCoreApplication, QThread

-from calibre import __appname__, setup_cli_handlers, islinux
+from calibre import __appname__, setup_cli_handlers, islinux, isfreebsd
 from calibre.ebooks.lrf.lrfparser import LRFDocument

 from calibre.gui2 import ORG_NAME, APP_UID, error_dialog, \
@ -258,7 +258,7 @@ def file_renderer(stream, opts, parent=None, logger=None):
        level = logging.DEBUG if opts.verbose else logging.INFO
        logger = logging.getLogger('lrfviewer')
        setup_cli_handlers(logger, level)
-    if islinux:
+    if islinux or isfreebsd:
        try: # Set lrfviewer as the default for LRF files for this user
            from subprocess import call
            call('xdg-mime default calibre-lrfviewer.desktop application/lrf', shell=True)
@ -307,7 +307,7 @@ def main(args=sys.argv, logger=None):
    if hasattr(opts, 'help'):
        parser.print_help()
        return 1
-    pid = os.fork() if islinux else -1
+    pid = os.fork() if (islinux or isfreebsd) else -1
    if pid <= 0:
        app = Application(args)
        app.setWindowIcon(QIcon(I('viewer.svg')))
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -20,7 +20,7 @@ from calibre.gui2 import Application, ORG_NAME, APP_UID, choose_files, \
                         info_dialog, error_dialog
 from calibre.ebooks.oeb.iterator import EbookIterator
 from calibre.ebooks import DRMError
-from calibre.constants import islinux
+from calibre.constants import islinux, isfreebsd
 from calibre.utils.config import Config, StringConfig, dynamic
 from calibre.gui2.search_box import SearchBox2
 from calibre.ebooks.metadata import MetaInformation
@ -686,7 +686,7 @@ View an ebook.
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
-    pid = os.fork() if False and islinux else -1
+    pid = os.fork() if False and (islinux or isfreebsd) else -1
    if pid <= 0:
        app = Application(args)
        app.setWindowIcon(QIcon(I('viewer.svg')))
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1294,7 +1294,7 @@ class EPUB_MOBI(CatalogPlugin):

                                if entry_type:
                                    user_notes[location] = dict(type=entry_type, id=self.id,
-                                                                      text=data[eo+8:eo+8+rec_len].decode('utf-16-be'))
+                                                                text=data[eo+8:eo+8+rec_len].decode('utf-16-be'))
                                    #print " %2d: %s %s" % (current_entry, entry_type,'at %d' % location if location else '')
                                #if current_block == 'text_block':
                                    #self.textdump(text)
@ -1307,12 +1307,17 @@ class EPUB_MOBI(CatalogPlugin):
                            while sig == 'BKMK':
                                # Fix start location for Highlights using BKMK data
                                end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
-                                #print "looking for end_loc %d in BKMK" % end_loc
                                if end_loc in user_notes and user_notes[end_loc]['type'] != 'Note':
                                    start, = unpack('>I', data[eo+8:eo+12])
                                    user_notes[start] = user_notes[end_loc]
                                    user_notes.pop(end_loc)
                                    #print "changing start location of %d to %d" % (end_loc,start)
+                                else:
+                                    # If a bookmark coincides with a user annotation, the locs could
+                                    # be the same - cheat by nudging -1
+                                    # Skip bookmark for last_read_location
+                                    if end_loc != self.last_read_location:
+                                        user_notes[end_loc - 1] = dict(type='Bookmark',id=self.id,text=None)
                                rec_len, = unpack('>I', data[eo+4:eo+8])
                                eo += rec_len + 8
                                sig = data[eo:eo+4]
@ -3361,7 +3366,7 @@ class EPUB_MOBI(CatalogPlugin):
            self.ncxSoup = ncx_soup

        def updateLibraryComments(self):
-            # Push user notes back to library
+            # Append user notes to library book['comments'], catalog book['description']
            from calibre.library.cli import send_message as calibre_send_message

            if self.bookmarked_books:
@ -3395,22 +3400,30 @@ class EPUB_MOBI(CatalogPlugin):
                        user_notes = self.bookmarked_books[id][0].user_notes
                        annotations = []

-                        '''
-                        spanTag = Tag(ka_soup, 'span')
-                        spanTag['style'] = 'font-style:italic;font-weight:bold'
-                        spanTag.insert(0,NavigableString("Kindle Annotations"))
-                        divTag.insert(dtc, spanTag)
-                        dtc += 1
-                        divTag.insert(dtc, Tag(ka_soup,'br'))
-                        dtc += 1
-                        '''
+                        if False:
+                            spanTag = Tag(ka_soup, 'span')
+                            spanTag['style'] = 'font-style:italic;font-weight:bold;text-align:right'
+                            spanTag.insert(0,NavigableString("Kindle Annotations"))
+                            divTag.insert(dtc, spanTag)
+                            dtc += 1
+                            divTag.insert(dtc, Tag(ka_soup,'br'))
+                            dtc += 1

                        # Add the annotations sorted by location
+                        # Italicize highlighted text
                        for location in sorted(user_notes):
-                            annotations.append('<b>Location %d &bull; %s</b><br />%s<br />' % \
-                                                self.magicKindleLocationCalculator(location),
-                                                user_notes[location]['type'],
-                                                user_notes[location]['text'])
+                            if user_notes[location]['text']:
+                                annotations.append('<b>Location %d &bull; %s</b><br />%s<br />' % \
+                                                    (self.magicKindleLocationCalculator(location),
+                                                     user_notes[location]['type'],
+                                                     user_notes[location]['text'] if \
+                                                        user_notes[location]['type'] == 'Note' else \
+                                                        '<i>%s</i>' % user_notes[location]['text']))
+                            else:
+                                annotations.append('<b>Location %d &bull; %s</b><br />' % \
+                                                    (self.magicKindleLocationCalculator(location),
+                                                     user_notes[location]['type']))
+
                        for annotation in annotations:
                            divTag.insert(dtc, annotation)
                            dtc += 1
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -196,6 +196,8 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
        for x in data:
            if isinstance(x['fmt_epub'], unicode):
                x['fmt_epub'] = x['fmt_epub'].encode('utf-8')
+            if isinstance(x['cover'], unicode):
+                x['cover'] = x['cover'].encode('utf-8')
        template = MarkupTemplate(STANZA_TEMPLATE)
        return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle,
                sep=os.sep, quote=quote, updated=db.last_modified()).render('xml')
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -263,7 +263,7 @@ class ResultCache(SearchQueryParser):
                                if item[loc].strip() != '':
                                    continue
                            matches.add(item[0])
-                            break
+                            continue
                        continue    ### item is empty. No possible matches below

                    if q == 'true':
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -7,6 +7,7 @@ import sys, os, shutil, cPickle, textwrap, stat
 from subprocess import check_call

 from calibre import  __appname__, prints
+from calibre.constants import islinux, isfreebsd


 entry_points = {
@ -128,20 +129,23 @@ class PostInstall:
        self.icon_resources = []
        self.menu_resources = []
        self.mime_resources = []
-        self.setup_completion()
-        self.setup_udev_rules()
+        if islinux:
+            self.setup_completion()
+            self.setup_udev_rules()
        self.install_man_pages()
-        self.setup_desktop_integration()
+        if islinux:
+            self.setup_desktop_integration()
        self.create_uninstaller()

        from calibre.utils.config import config_dir
        if os.path.exists(config_dir):
            os.chdir(config_dir)
-            for f in os.listdir('.'):
-                if os.stat(f).st_uid == 0:
-                    os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
-            if os.stat(config_dir).st_uid == 0:
-                os.rmdir(config_dir)
+            if islinux:
+                for f in os.listdir('.'):
+                    if os.stat(f).st_uid == 0:
+                        os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
+                if os.stat(config_dir).st_uid == 0:
+                    os.rmdir(config_dir)

        if warn is None and self.warnings:
            self.info('There were %d warnings'%len(self.warnings))
@ -318,7 +322,10 @@ class PostInstall:
    def install_man_pages(self):
        try:
            from calibre.utils.help2man import create_man_page
-            manpath = os.path.join(self.opts.staging_sharedir, 'man/man1')
+            if isfreebsd:
+                manpath = os.path.join(self.opts.staging_root, 'man/man1')
+            else:
+                manpath = os.path.join(self.opts.staging_sharedir, 'man/man1')
            if not os.path.exists(manpath):
                os.makedirs(manpath)
            self.info('Installing MAN pages...')
@ -331,7 +338,10 @@ class PostInstall:
                    continue
                parser = parser()
                raw = create_man_page(prog, parser)
-                manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2')
+                if isfreebsd:
+                    manfile = os.path.join(manpath, prog+'.1')
+                else:
+                    manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2')
                self.info('\tInstalling MAN page for', prog)
                open(manfile, 'wb').write(raw)
                self.manifest.append(manfile)
--- a/src/calibre/manual/develop.rst
+++ b/src/calibre/manual/develop.rst
@ -204,6 +204,10 @@ terminal. For example, you can start the GUI from the terminal as::

    calibre-debug -g

+Similarly, you can start the ebook-viewer as::
+
+    calibre-debug -w /path/to/file/to/be/viewed
+
 Executing arbitrary scripts in the calibre python environment
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

--- a/src/calibre/utils/chm/init.py
+++ b/src/calibre/utils/chm/init.py
@ -0,0 +1,34 @@
+## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
+
+## pychm is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+
+## You should have received a copy of the GNU General Public
+## License along with this program; see the file COPYING.  If not,
+## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+## Boston, MA 02111-1307, USA
+
+## $Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $
+
+'''
+   chm - A package to manipulate CHM files
+
+   The chm package provides four modules: chm, chmlib, extra and
+   _chmlib. _chmlib and chmlib are very low level libraries generated
+   from  SWIG interface files, and are simple wrappers around the API
+   defined by the C library chmlib.
+   The extra module adds full-text search support.
+   the chm module provides some higher level classes to simplify
+   access to the CHM files information.
+'''
+__all__ = ["chm", "chmlib", "_chmlib", "extra"]
+__version__ = "0.8.4"
+__revision__ = "$Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $"
+
--- a/src/calibre/utils/chm/chm.py
+++ b/src/calibre/utils/chm/chm.py
@ -0,0 +1,512 @@
+## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
+
+## Based on code by:
+## Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>
+
+## pychm is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+
+## You should have received a copy of the GNU General Public
+## License along with this program; see the file COPYING.  If not,
+## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+## Boston, MA 02111-1307, USA
+
+## $Id: chm.py,v 1.12 2006/08/07 12:31:51 rubensr Exp $
+
+'''
+   chm - A high-level front end for the chmlib python module.
+
+   The chm module provides high level access to the functionality
+   included in chmlib. It encapsulates functions in the CHMFile class, and
+   provides some additional features, such as the ability to obtain
+   the contents tree of a CHM archive.
+
+'''
+
+import array
+import string
+import sys
+
+import calibre.utils.chm.chmlib as chmlib
+from calibre.constants import plugins
+
+extra, extra_err = plugins['chm_extra']
+if extra_err:
+    raise RuntimeError('Failed to load chm.extra: '+extra_err)
+
+charset_table = {
+    0   : 'iso8859_1',  # ANSI_CHARSET
+    238 : 'iso8859_2',  # EASTEUROPE_CHARSET
+    178 : 'iso8859_6',  # ARABIC_CHARSET
+    161 : 'iso8859_7',  # GREEK_CHARSET
+    177 : 'iso8859_8',  # HEBREW_CHARSET
+    162 : 'iso8859_9',  # TURKISH_CHARSET
+    222 : 'iso8859_11', # THAI_CHARSET - hmm not in python 2.2...
+    186 : 'iso8859_13', # BALTIC_CHARSET
+    204 : 'cp1251',     # RUSSIAN_CHARSET
+    255 : 'cp437',      # OEM_CHARSET
+    128 : 'cp932',      # SHIFTJIS_CHARSET
+    134 : 'cp936',      # GB2312_CHARSET
+    129 : 'cp949',      # HANGUL_CHARSET
+    136 : 'cp950',      # CHINESEBIG5_CHARSET
+    1   : None,         # DEFAULT_CHARSET
+    2   : None,         # SYMBOL_CHARSET
+    130 : None,         # JOHAB_CHARSET
+    163 : None,         # VIETNAMESE_CHARSET
+    77  : None,         # MAC_CHARSET
+}
+
+locale_table = {
+    0x0436 : ('iso8859_1', "Afrikaans", "Western Europe & US"),
+    0x041c : ('iso8859_2', "Albanian", "Central Europe"),
+    0x0401 : ('iso8859_6', "Arabic_Saudi_Arabia", "Arabic"),
+    0x0801 : ('iso8859_6', "Arabic_Iraq", "Arabic"),
+    0x0c01 : ('iso8859_6', "Arabic_Egypt", "Arabic"),
+    0x1001 : ('iso8859_6', "Arabic_Libya", "Arabic"),
+    0x1401 : ('iso8859_6', "Arabic_Algeria", "Arabic"),
+    0x1801 : ('iso8859_6', "Arabic_Morocco", "Arabic"),
+    0x1c01 : ('iso8859_6', "Arabic_Tunisia", "Arabic"),
+    0x2001 : ('iso8859_6', "Arabic_Oman", "Arabic"),
+    0x2401 : ('iso8859_6', "Arabic_Yemen", "Arabic"),
+    0x2801 : ('iso8859_6', "Arabic_Syria", "Arabic"),
+    0x2c01 : ('iso8859_6', "Arabic_Jordan", "Arabic"),
+    0x3001 : ('iso8859_6', "Arabic_Lebanon", "Arabic"),
+    0x3401 : ('iso8859_6', "Arabic_Kuwait", "Arabic"),
+    0x3801 : ('iso8859_6', "Arabic_UAE", "Arabic"),
+    0x3c01 : ('iso8859_6', "Arabic_Bahrain", "Arabic"),
+    0x4001 : ('iso8859_6', "Arabic_Qatar", "Arabic"),
+    0x042b : (None,        "Armenian","Armenian"),
+    0x042c : ('iso8859_9', "Azeri_Latin", "Turkish"),
+    0x082c : ('cp1251',    "Azeri_Cyrillic", "Cyrillic"),
+    0x042d : ('iso8859_1', "Basque", "Western Europe & US"),
+    0x0423 : ('cp1251',    "Belarusian", "Cyrillic"),
+    0x0402 : ('cp1251',    "Bulgarian", "Cyrillic"),
+    0x0403 : ('iso8859_1', "Catalan", "Western Europe & US"),
+    0x0404 : ('cp950',     "Chinese_Taiwan", "Traditional Chinese"),
+    0x0804 : ('cp936',     "Chinese_PRC", "Simplified Chinese"),
+    0x0c04 : ('cp950',     "Chinese_Hong_Kong", "Traditional Chinese"),
+    0x1004 : ('cp936',     "Chinese_Singapore", "Simplified Chinese"),
+    0x1404 : ('cp950',     "Chinese_Macau", "Traditional Chinese"),
+    0x041a : ('iso8859_2', "Croatian", "Central Europe"),
+    0x0405 : ('iso8859_2', "Czech", "Central Europe"),
+    0x0406 : ('iso8859_1', "Danish", "Western Europe & US"),
+    0x0413 : ('iso8859_1', "Dutch_Standard", "Western Europe & US"),
+    0x0813 : ('iso8859_1', "Dutch_Belgian", "Western Europe & US"),
+    0x0409 : ('iso8859_1', "English_United_States", "Western Europe & US"),
+    0x0809 : ('iso8859_1', "English_United_Kingdom", "Western Europe & US"),
+    0x0c09 : ('iso8859_1', "English_Australian", "Western Europe & US"),
+    0x1009 : ('iso8859_1', "English_Canadian", "Western Europe & US"),
+    0x1409 : ('iso8859_1', "English_New_Zealand", "Western Europe & US"),
+    0x1809 : ('iso8859_1', "English_Irish", "Western Europe & US"),
+    0x1c09 : ('iso8859_1', "English_South_Africa", "Western Europe & US"),
+    0x2009 : ('iso8859_1', "English_Jamaica", "Western Europe & US"),
+    0x2409 : ('iso8859_1', "English_Caribbean", "Western Europe & US"),
+    0x2809 : ('iso8859_1', "English_Belize", "Western Europe & US"),
+    0x2c09 : ('iso8859_1', "English_Trinidad", "Western Europe & US"),
+    0x3009 : ('iso8859_1', "English_Zimbabwe", "Western Europe & US"),
+    0x3409 : ('iso8859_1', "English_Philippines", "Western Europe & US"),
+    0x0425 : ('iso8859_13',"Estonian", "Baltic",),
+    0x0438 : ('iso8859_1', "Faeroese", "Western Europe & US"),
+    0x0429 : ('iso8859_6', "Farsi", "Arabic"),
+    0x040b : ('iso8859_1', "Finnish", "Western Europe & US"),
+    0x040c : ('iso8859_1', "French_Standard", "Western Europe & US"),
+    0x080c : ('iso8859_1', "French_Belgian", "Western Europe & US"),
+    0x0c0c : ('iso8859_1', "French_Canadian", "Western Europe & US"),
+    0x100c : ('iso8859_1', "French_Swiss", "Western Europe & US"),
+    0x140c : ('iso8859_1', "French_Luxembourg", "Western Europe & US"),
+    0x180c : ('iso8859_1', "French_Monaco", "Western Europe & US"),
+    0x0437 : (None,        "Georgian", "Georgian"),
+    0x0407 : ('iso8859_1', "German_Standard", "Western Europe & US"),
+    0x0807 : ('iso8859_1', "German_Swiss", "Western Europe & US"),
+    0x0c07 : ('iso8859_1', "German_Austrian", "Western Europe & US"),
+    0x1007 : ('iso8859_1', "German_Luxembourg", "Western Europe & US"),
+    0x1407 : ('iso8859_1', "German_Liechtenstein", "Western Europe & US"),
+    0x0408 : ('iso8859_7', "Greek", "Greek"),
+    0x040d : ('iso8859_8', "Hebrew", "Hebrew"),
+    0x0439 : (None,        "Hindi", "Indic"),
+    0x040e : ('iso8859_2', "Hungarian", "Central Europe"),
+    0x040f : ('iso8859_1', "Icelandic", "Western Europe & US"),
+    0x0421 : ('iso8859_1', "Indonesian", "Western Europe & US"),
+    0x0410 : ('iso8859_1', "Italian_Standard", "Western Europe & US"),
+    0x0810 : ('iso8859_1', "Italian_Swiss", "Western Europe & US"),
+    0x0411 : ('cp932',     "Japanese", "Japanese"),
+    0x043f : ('cp1251',    "Kazakh", "Cyrillic"),
+    0x0457 : (None,        "Konkani", "Indic"),
+    0x0412 : ('cp949',     "Korean", "Korean"),
+    0x0426 : ('iso8859_13',"Latvian", "Baltic",),
+    0x0427 : ('iso8859_13',"Lithuanian", "Baltic",),
+    0x042f : ('cp1251',    "Macedonian", "Cyrillic"),
+    0x043e : ('iso8859_1', "Malay_Malaysia", "Western Europe & US"),
+    0x083e : ('iso8859_1', "Malay_Brunei_Darussalam", "Western Europe & US"),
+    0x044e : (None,        "Marathi", "Indic"),
+    0x0414 : ('iso8859_1', "Norwegian_Bokmal", "Western Europe & US"),
+    0x0814 : ('iso8859_1', "Norwegian_Nynorsk", "Western Europe & US"),
+    0x0415 : ('iso8859_2', "Polish", "Central Europe"),
+    0x0416 : ('iso8859_1', "Portuguese_Brazilian", "Western Europe & US"),
+    0x0816 : ('iso8859_1', "Portuguese_Standard", "Western Europe & US"),
+    0x0418 : ('iso8859_2', "Romanian", "Central Europe"),
+    0x0419 : ('cp1251',    "Russian", "Cyrillic"),
+    0x044f : (None,        "Sanskrit", "Indic"),
+    0x081a : ('iso8859_2', "Serbian_Latin", "Central Europe"),
+    0x0c1a : ('cp1251',    "Serbian_Cyrillic", "Cyrillic"),
+    0x041b : ('iso8859_2', "Slovak", "Central Europe"),
+    0x0424 : ('iso8859_2', "Slovenian", "Central Europe"),
+    0x040a : ('iso8859_1', "Spanish_Trad_Sort", "Western Europe & US"),
+    0x080a : ('iso8859_1', "Spanish_Mexican", "Western Europe & US"),
+    0x0c0a : ('iso8859_1', "Spanish_Modern_Sort", "Western Europe & US"),
+    0x100a : ('iso8859_1', "Spanish_Guatemala", "Western Europe & US"),
+    0x140a : ('iso8859_1', "Spanish_Costa_Rica", "Western Europe & US"),
+    0x180a : ('iso8859_1', "Spanish_Panama", "Western Europe & US"),
+    0x1c0a : ('iso8859_1', "Spanish_Dominican_Repub", "Western Europe & US"),
+    0x200a : ('iso8859_1', "Spanish_Venezuela", "Western Europe & US"),
+    0x240a : ('iso8859_1', "Spanish_Colombia", "Western Europe & US"),
+    0x280a : ('iso8859_1', "Spanish_Peru", "Western Europe & US"),
+    0x2c0a : ('iso8859_1', "Spanish_Argentina", "Western Europe & US"),
+    0x300a : ('iso8859_1', "Spanish_Ecuador", "Western Europe & US"),
+    0x340a : ('iso8859_1', "Spanish_Chile", "Western Europe & US"),
+    0x380a : ('iso8859_1', "Spanish_Uruguay", "Western Europe & US"),
+    0x3c0a : ('iso8859_1', "Spanish_Paraguay", "Western Europe & US"),
+    0x400a : ('iso8859_1', "Spanish_Bolivia", "Western Europe & US"),
+    0x440a : ('iso8859_1', "Spanish_El_Salvador", "Western Europe & US"),
+    0x480a : ('iso8859_1', "Spanish_Honduras", "Western Europe & US"),
+    0x4c0a : ('iso8859_1', "Spanish_Nicaragua", "Western Europe & US"),
+    0x500a : ('iso8859_1', "Spanish_Puerto_Rico", "Western Europe & US"),
+    0x0441 : ('iso8859_1', "Swahili", "Western Europe & US"),
+    0x041d : ('iso8859_1', "Swedish", "Western Europe & US"),
+    0x081d : ('iso8859_1', "Swedish_Finland", "Western Europe & US"),
+    0x0449 : (None,        "Tamil", "Indic"),
+    0x0444 : ('cp1251',    "Tatar", "Cyrillic"),
+    0x041e : ('iso8859_11',"Thai", "Thai"),
+    0x041f : ('iso8859_9', "Turkish", "Turkish"),
+    0x0422 : ('cp1251',    "Ukrainian", "Cyrillic"),
+    0x0420 : ('iso8859_6', "Urdu", "Arabic"),
+    0x0443 : ('iso8859_9', "Uzbek_Latin", "Turkish"),
+    0x0843 : ('cp1251',    "Uzbek_Cyrillic", "Cyrillic"),
+    0x042a : (None,        "Vietnamese", "Vietnamese")
+}
+
+class CHMFile:
+    "A class to manage access to CHM files."
+    filename = ""
+    file = None
+    title = ""
+    home = "/"
+    index = None
+    topics = None
+    encoding = None
+    lcid = None
+    binaryindex = None
+
+    def __init__(self):
+        self.searchable = 0
+
+    def LoadCHM(self, archiveName):
+        '''Loads a CHM archive.
+        This function will also call GetArchiveInfo to obtain information
+        such as the index file name and the topics file. It returns 1 on
+        success, and 0 if it fails.
+        '''
+        if (self.filename != None):
+            self.CloseCHM()
+
+        self.file = chmlib.chm_open(archiveName)
+        if (self.file == None):
+            return 0
+
+        self.filename = archiveName
+        self.GetArchiveInfo()
+
+        return 1
+
+    def CloseCHM(self):
+        '''Closes the CHM archive.
+        This function will close the CHM file, if it is open. All variables
+        are also reset.
+        '''
+        if (self.filename != None):
+            chmlib.chm_close(self.file)
+            self.file = None
+            self.filename = ''
+            self.title = ""
+            self.home = "/"
+            self.index = None
+            self.topics = None
+            self.encoding = None
+
+    def GetArchiveInfo(self):
+        '''Obtains information on CHM archive.
+        This function checks the /#SYSTEM file inside the CHM archive to
+        obtain the index, home page, topics, encoding and title. It is called
+        from LoadCHM.
+        '''
+
+        #extra.is_searchable crashed...
+        #self.searchable = extra.is_searchable (self.file)
+        self.searchable = False
+        self.lcid = None
+
+        result, ui = chmlib.chm_resolve_object(self.file, '/#SYSTEM')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            sys.stderr.write('GetArchiveInfo: #SYSTEM does not exist\n')
+            return 0
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 4l, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetArchiveInfo: file size = 0\n')
+            return 0
+
+        buff = array.array('B', text)
+
+        index = 0
+        while (index < size):
+            cursor = buff[index] + (buff[index+1] * 256)
+
+            if (cursor == 0):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.topics = '/' + text[index:index+cursor-1]
+            elif (cursor == 1):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.index = '/' + text[index:index+cursor-1]
+            elif (cursor == 2):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.home = '/' + text[index:index+cursor-1]
+            elif (cursor == 3):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.title = text[index:index+cursor-1]
+            elif (cursor == 4):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.lcid = buff[index] + (buff[index+1] * 256)
+            elif (cursor == 6):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                tmp = text[index:index+cursor-1]
+                if not self.topics:
+                    tmp1 = '/' + tmp + '.hhc'
+                    tmp2 = '/' + tmp + '.hhk'
+                    res1, ui1 = chmlib.chm_resolve_object(self.file, tmp1)
+                    res2, ui2 = chmlib.chm_resolve_object(self.file, tmp2)
+                    if (not self.topics) and \
+                           (res1 == chmlib.CHM_RESOLVE_SUCCESS):
+                        self.topics = '/' + tmp + '.hhc'
+                    if (not self.index) and \
+                           (res2 == chmlib.CHM_RESOLVE_SUCCESS):
+                        self.index = '/' + tmp + '.hhk'
+            elif (cursor == 16):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.encoding = text[index:index+cursor-1]
+            else:
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+            index += cursor
+
+        self.GetWindowsInfo()
+
+        if not self.lcid:
+            self.lcid = extra.get_lcid (self.file)
+
+        return 1
+
+    def GetTopicsTree(self):
+        '''Reads and returns the topics tree.
+        This auxiliary function reads and returns the topics tree file
+        contents for the CHM archive.
+        '''
+        if (self.topics == None):
+            return None
+
+        if self.topics:
+            res, ui = chmlib.chm_resolve_object(self.file, self.topics)
+            if (res != chmlib.CHM_RESOLVE_SUCCESS):
+                return None
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetTopicsTree: file size = 0\n')
+            return None
+        return text
+
+    def GetIndex(self):
+        '''Reads and returns the index tree.
+        This auxiliary function reads and returns the index tree file
+        contents for the CHM archive.
+        '''
+        if (self.index == None):
+            return None
+
+        if self.index:
+            res, ui = chmlib.chm_resolve_object(self.file, self.index)
+            if (res != chmlib.CHM_RESOLVE_SUCCESS):
+                return None
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetIndex: file size = 0\n')
+            return None
+        return text
+
+    def ResolveObject(self, document):
+        '''Tries to locate a document in the archive.
+        This function tries to locate the document inside the archive. It
+        returns a tuple where the first element is zero if the function
+        was successful, and the second is the UnitInfo for that document.
+        The UnitInfo is used to retrieve the document contents
+        '''
+        if self.file:
+            #path = os.path.abspath(document)
+            path = document
+            return chmlib.chm_resolve_object(self.file, path)
+        else:
+            return (1, None)
+
+    def RetrieveObject(self, ui, start = -1, length = -1):
+        '''Retrieves the contents of a document.
+        This function takes a UnitInfo and two optional arguments, the first
+        being the start address and the second is the length. These define
+        the amount of data to be read from the archive.
+        '''
+        if self.file and ui:
+            if length == -1:
+                len = ui.length
+            else:
+                len = length
+            if start == -1:
+                st = 0l
+            else:
+                st = long(start)
+            return chmlib.chm_retrieve_object(self.file, ui, st, len)
+        else:
+            return (0, '')
+
+    def Search(self, text, wholewords=0, titleonly=0):
+        '''Performs full-text search on the archive.
+        The first parameter is the word to look for, the second
+        indicates if the search should be for whole words only, and
+        the third parameter indicates if the search should be
+        restricted to page titles.
+        This method will return a tuple, the first item
+        indicating if the search results were partial, and the second
+        item being a dictionary containing the results.'''
+        if text and text != '' and self.file:
+            return extra.search (self.file, text, wholewords,
+                                 titleonly)
+        else:
+            return None
+
+    def IsSearchable(self):
+        '''Indicates if the full-text search is available for this
+        archive - this flag is updated when GetArchiveInfo is called'''
+        return self.searchable
+
+    def GetEncoding(self):
+        '''Returns a string that can be used with the codecs python package
+        to encode or decode the files in the chm archive. If an error is
+        found, or if it is not possible to find the encoding, None is
+        returned.'''
+        if self.encoding:
+            vals = string.split(self.encoding, ',')
+            if len(vals) > 2:
+                try:
+                    return charset_table[int(vals[2])]
+                except KeyError:
+                    pass
+        return None
+
+    def GetLCID(self):
+        '''Returns the archive Locale ID'''
+        if self.lcid in locale_table:
+            return locale_table[self.lcid]
+        else:
+            return None
+
+    def GetDWORD(self, buff, idx=0):
+        '''Internal method.
+        Reads a double word (4 bytes) from a buffer.
+        '''
+        result = buff[idx] + (buff[idx+1]<<8) + (buff[idx+2]<<16) + \
+                 (buff[idx+3]<<24)
+
+        if result == 0xFFFFFFFF:
+            result = 0
+
+        return result
+
+    def GetString(self, text, idx):
+        '''Internal method.
+        Retrieves a string from the #STRINGS buffer.
+        '''
+        next = string.find(text, '\x00', idx)
+        chunk = text[idx:next]
+        return chunk
+
+    def GetWindowsInfo(self):
+        '''Gets information from the #WINDOWS file.
+        Checks the #WINDOWS file to see if it has any info that was
+        not found in #SYSTEM (topics, index or default page.
+        '''
+        result, ui = chmlib.chm_resolve_object(self.file, '/#WINDOWS')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            return -1
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, 8)
+        if (size < 8):
+            return -2
+
+        buff = array.array('B', text)
+        num_entries = self.GetDWORD(buff, 0)
+        entry_size = self.GetDWORD(buff, 4)
+
+        if num_entries < 1:
+            return -3
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 8l, entry_size)
+        if (size < entry_size):
+            return -4
+
+        buff = array.array('B', text)
+        toc_index = self.GetDWORD(buff, 0x60)
+        idx_index = self.GetDWORD(buff, 0x64)
+        dft_index = self.GetDWORD(buff, 0x68)
+
+        result, ui = chmlib.chm_resolve_object(self.file, '/#STRINGS')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            return -5
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
+        if (size == 0):
+            return -6
+
+        if (not self.topics):
+            self.topics = self.GetString(text, toc_index)
+            if not self.topics.startswith("/"):
+                self.topics = "/" + self.topics
+
+        if (not self.index):
+            self.index = self.GetString(text, idx_index)
+            if not self.index.startswith("/"):
+                self.index = "/" + self.index
+
+        if (dft_index != 0):
+            self.home = self.GetString(text, dft_index)
+            if not self.home.startswith("/"):
+                self.home = "/" + self.home
--- a/src/calibre/utils/chm/chmlib.py
+++ b/src/calibre/utils/chm/chmlib.py
@ -0,0 +1,100 @@
+# This file was created automatically by SWIG.
+# Don't modify this file, modify the SWIG interface instead.
+# This file is compatible with both classic and new-style classes.
+
+from calibre.constants import plugins
+
+_chmlib, chmlib_err = plugins['chmlib']
+
+if chmlib_err:
+    raise RuntimeError('Failed to load chmlib: '+chmlib_err)
+
+def _swig_setattr(self,class_type,name,value):
+    if (name == "this"):
+        if isinstance(value, class_type):
+            self.__dict__[name] = value.this
+            if hasattr(value,"thisown"): self.__dict__["thisown"] = value.thisown
+            del value.thisown
+            return
+    method = class_type.__swig_setmethods__.get(name,None)
+    if method: return method(self,value)
+    self.__dict__[name] = value
+
+def _swig_getattr(self,class_type,name):
+    method = class_type.__swig_getmethods__.get(name,None)
+    if method: return method(self)
+    raise AttributeError,name
+
+import types
+try:
+    _object = types.ObjectType
+    _newclass = 1
+except AttributeError:
+    class _object : pass
+    _newclass = 0
+
+
+CHM_UNCOMPRESSED = _chmlib.CHM_UNCOMPRESSED
+CHM_COMPRESSED = _chmlib.CHM_COMPRESSED
+CHM_MAX_PATHLEN = _chmlib.CHM_MAX_PATHLEN
+class chmUnitInfo(_object):
+    __swig_setmethods__ = {}
+    __setattr__ = lambda self, name, value: _swig_setattr(self, chmUnitInfo, name, value)
+    __swig_getmethods__ = {}
+    __getattr__ = lambda self, name: _swig_getattr(self, chmUnitInfo, name)
+    __swig_setmethods__["start"] = _chmlib.chmUnitInfo_start_set
+    __swig_getmethods__["start"] = _chmlib.chmUnitInfo_start_get
+    if _newclass:start = property(_chmlib.chmUnitInfo_start_get,_chmlib.chmUnitInfo_start_set)
+    __swig_setmethods__["length"] = _chmlib.chmUnitInfo_length_set
+    __swig_getmethods__["length"] = _chmlib.chmUnitInfo_length_get
+    if _newclass:length = property(_chmlib.chmUnitInfo_length_get,_chmlib.chmUnitInfo_length_set)
+    __swig_setmethods__["space"] = _chmlib.chmUnitInfo_space_set
+    __swig_getmethods__["space"] = _chmlib.chmUnitInfo_space_get
+    if _newclass:space = property(_chmlib.chmUnitInfo_space_get,_chmlib.chmUnitInfo_space_set)
+    __swig_setmethods__["path"] = _chmlib.chmUnitInfo_path_set
+    __swig_getmethods__["path"] = _chmlib.chmUnitInfo_path_get
+    if _newclass:path = property(_chmlib.chmUnitInfo_path_get,_chmlib.chmUnitInfo_path_set)
+    def __init__(self,*args):
+        _swig_setattr(self, chmUnitInfo, 'this', apply(_chmlib.new_chmUnitInfo,args))
+        _swig_setattr(self, chmUnitInfo, 'thisown', 1)
+    def __del__(self, destroy= _chmlib.delete_chmUnitInfo):
+        try:
+            if self.thisown: destroy(self)
+        except: pass
+    def __repr__(self):
+        return "<C chmUnitInfo instance at %s>" % (self.this,)
+
+class chmUnitInfoPtr(chmUnitInfo):
+    def __init__(self,this):
+        _swig_setattr(self, chmUnitInfo, 'this', this)
+        if not hasattr(self,"thisown"): _swig_setattr(self, chmUnitInfo, 'thisown', 0)
+        _swig_setattr(self, chmUnitInfo,self.__class__,chmUnitInfo)
+_chmlib.chmUnitInfo_swigregister(chmUnitInfoPtr)
+
+chm_open = _chmlib.chm_open
+
+chm_close = _chmlib.chm_close
+
+CHM_PARAM_MAX_BLOCKS_CACHED = _chmlib.CHM_PARAM_MAX_BLOCKS_CACHED
+chm_set_param = _chmlib.chm_set_param
+
+CHM_RESOLVE_SUCCESS = _chmlib.CHM_RESOLVE_SUCCESS
+CHM_RESOLVE_FAILURE = _chmlib.CHM_RESOLVE_FAILURE
+chm_resolve_object = _chmlib.chm_resolve_object
+
+chm_retrieve_object = _chmlib.chm_retrieve_object
+
+CHM_ENUMERATE_NORMAL = _chmlib.CHM_ENUMERATE_NORMAL
+CHM_ENUMERATE_META = _chmlib.CHM_ENUMERATE_META
+CHM_ENUMERATE_SPECIAL = _chmlib.CHM_ENUMERATE_SPECIAL
+CHM_ENUMERATE_FILES = _chmlib.CHM_ENUMERATE_FILES
+CHM_ENUMERATE_DIRS = _chmlib.CHM_ENUMERATE_DIRS
+CHM_ENUMERATE_ALL = _chmlib.CHM_ENUMERATE_ALL
+CHM_ENUMERATOR_FAILURE = _chmlib.CHM_ENUMERATOR_FAILURE
+CHM_ENUMERATOR_CONTINUE = _chmlib.CHM_ENUMERATOR_CONTINUE
+CHM_ENUMERATOR_SUCCESS = _chmlib.CHM_ENUMERATOR_SUCCESS
+chm_enumerate = _chmlib.chm_enumerate
+
+chm_enumerate_dir = _chmlib.chm_enumerate_dir
+
+
--- a/src/calibre/utils/chm/extra.c
+++ b/src/calibre/utils/chm/extra.c
@ -0,0 +1,759 @@
+/*
+ * extra.c - full-text search support for pychm
+ *
+ * Copyright (C) 2004 Rubens Ramos <rubensr@users.sourceforge.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, 
+ * Boston, MA 02111-1307, USA.
+ *
+ * Author: Rubens Ramos <rubensr@users.sourceforge.net>
+ *
+ * Heavily based on work done by:
+ * Pabs <pabs@zip.to> - chmdeco
+ * Razvan Cojocaru <razvanco@gmx.net> - xCHM
+ *
+ */
+
+#include "chm_lib.h"
+#ifdef __PYTHON__
+#include "Python.h"
+#else
+#include <stdio.h>
+#define PyObject void
+#endif
+
+#include <stdlib.h>
+
+#ifdef _MSC_VER
+#include "stdint.h"
+#define strcasecmp stricmp
+#define strncasecmp strnicmp
+#else
+#include <inttypes.h>
+#include <strings.h>
+#endif
+
+#if defined( _MSC_VER ) && !defined( __cplusplus )
+# define inline __inline
+#endif
+
+#if defined(_WIN32) || defined(__WIN32__)
+#       if defined(_MSC_VER)
+#               if defined(STATIC_LINKED)
+#                       define MODEXPORT(a) a
+#                       define MODIMPORT(a) extern a
+#               else
+#                       define MODEXPORT(a) __declspec(dllexport) a
+#                       define MODIMPORT(a) extern a
+#               endif
+#       else
+#               if defined(__BORLANDC__)
+#                       define MODEXPORT(a) a _export
+#                       define MODIMPORT(a) a _export
+#               else
+#                       define MODEXPORT(a) a
+#                       define MODIMPORT(a) a
+#               endif
+#       endif
+#else
+#       define MODEXPORT(a) a
+#       define MODIMPORT(a) a
+#endif
+
+#define false 0
+#define true 1
+
+#define FTS_HEADER_LEN 0x32
+#define TOPICS_ENTRY_LEN 16
+#define COMMON_BUF_LEN 1025
+
+#define FREE(x) free (x); x = NULL
+
+inline uint16_t 
+get_uint16 (uint8_t* b) {
+  return b[0] |
+    b[1]<<8;
+}
+
+inline uint32_t 
+get_uint32 (uint8_t* b) {
+  return b[0] |
+    b[1]<<8   |
+    b[2]<<16  |
+    b[3]<<24;
+}
+
+inline uint64_t 
+get_uint64 (uint8_t* b) {
+  return b[0]           |
+    b[1]<<8             |
+    b[2]<<16            |
+    b[3]<<24            |
+    (uint64_t) b[4]<<32 |
+    (uint64_t) b[5]<<40 |
+    (uint64_t) b[6]<<48 |
+    (uint64_t) b[7]<<56;
+}
+
+inline uint64_t 
+be_encint (unsigned char *buffer, size_t *length)
+{
+  uint64_t result = 0;
+  int shift=0;
+  *length = 0;
+  
+  do {
+    result |= ((*buffer) & 0x7f) << shift;
+    shift += 7;
+    *length = *length + 1;
+  
+  } while (*(buffer++) & 0x80);
+  
+  return result;
+}
+
+/*
+  Finds the first unset bit in memory. Returns the number of set bits found.
+  Returns -1 if the buffer runs out before we find an unset bit.
+*/
+inline int
+ffus (unsigned char* byte, int* bit, size_t *length) {
+  int bits = 0;
+  *length = 0;
+  
+  while(*byte & (1 << *bit)){
+    if(*bit)
+      --(*bit);
+    else {
+      ++byte;
+      ++(*length);
+      *bit = 7;
+    }
+    ++bits;
+  }
+  
+  if(*bit)
+    --(*bit);
+  else {
+    ++(*length);
+    *bit = 7;
+  }
+  
+  return bits;
+}
+
+
+inline uint64_t
+sr_int(unsigned char* byte, int* bit,
+       unsigned char s, unsigned char r, size_t *length)
+{
+  uint64_t ret;
+  unsigned char mask;
+  int n, n_bits, num_bits, base, count;
+  size_t fflen;
+
+  *length = 0;
+  
+  if(!bit || *bit > 7 || s != 2)
+    return ~(uint64_t)0;
+  ret = 0;
+  
+  count = ffus(byte, bit, &fflen);
+  *length += fflen;
+  byte += *length;
+  
+  n_bits = n = r + (count ? count-1 : 0) ;
+  
+  while (n > 0) {
+    num_bits = n > *bit ? *bit : n-1;
+    base = n > *bit ? 0 : *bit - (n-1);
+  
+    switch (num_bits){
+    case 0:
+      mask = 1;
+      break;
+    case 1:
+      mask = 3;
+      break;
+    case 2:
+      mask = 7;
+      break;
+    case 3:
+      mask = 0xf;
+      break;
+    case 4:
+      mask = 0x1f;
+      break;
+    case 5:
+      mask = 0x3f;
+      break;
+    case 6:
+      mask = 0x7f;
+      break;
+    case 7:
+      mask = 0xff;
+      break;
+    default:
+      mask = 0xff;
+      break;
+    }
+  
+    mask <<= base;
+    ret = (ret << (num_bits+1)) |
+      (uint64_t)((*byte & mask) >> base);
+  
+    if( n > *bit ){
+      ++byte;
+      ++(*length);
+      n -= *bit+1;
+      *bit = 7;
+    } else {
+      *bit -= n;
+      n = 0;
+    }
+  }
+  
+  if(count)
+    ret |= (uint64_t)1 << n_bits;
+  
+  return ret;
+}
+
+            
+inline uint32_t
+get_leaf_node_offset(struct chmFile *chmfile,
+                     const char *text,
+                     uint32_t initial_offset,
+                     uint32_t buff_size,
+                     uint16_t tree_depth,
+                     struct chmUnitInfo *ui)
+{
+  unsigned char word_len;
+  unsigned char pos;
+  uint16_t free_space;
+  char *wrd_buf;
+  char *word = NULL;
+  uint32_t test_offset = 0;
+  uint32_t i = sizeof(uint16_t);
+  unsigned char *buffer = (unsigned char *)malloc (buff_size);
+
+  if (NULL == buffer)
+    return 0;
+  
+  while (--tree_depth) {
+    if (initial_offset == test_offset) {
+      FREE(buffer);
+      return 0;
+    }
+    
+    test_offset = initial_offset;
+    if (chm_retrieve_object (chmfile, ui, buffer, 
+                             initial_offset, buff_size) == 0) {
+      FREE(buffer);
+      return 0;
+    }
+    
+    free_space = get_uint16 (buffer);
+    
+    while (i < buff_size - free_space) {
+
+      word_len = *(buffer + i);
+      pos = *(buffer + i + 1);
+      
+      wrd_buf = (char*)malloc (word_len);
+      memcpy (wrd_buf, buffer + i + 2, word_len - 1);
+      wrd_buf[word_len - 1] = 0;
+      
+      if (pos == 0) {
+        FREE (word);
+        word = (char *) strdup (wrd_buf);
+      } else {
+        word = (char*)realloc (word, word_len + pos + 1);
+        strcpy (word + pos, wrd_buf);
+      }
+
+      FREE(wrd_buf);
+      
+      if (strcasecmp (text, word) <= 0) {
+        initial_offset = get_uint32 (buffer + i + word_len + 1);
+        break;
+      }
+      
+      i += word_len + sizeof (unsigned char) + sizeof(uint32_t) + 
+        sizeof(uint16_t);
+    }
+  }
+  
+  if(initial_offset == test_offset)
+    initial_offset = 0;
+
+  FREE(word);
+  FREE(buffer);
+
+  return initial_offset;
+}
+
+inline int 
+pychm_process_wlc (struct chmFile *chmfile,
+                   uint64_t wlc_count, uint64_t wlc_size,
+                   uint32_t wlc_offset, unsigned char ds,
+                   unsigned char dr, unsigned char cs,
+                   unsigned char cr, unsigned char ls,
+                   unsigned char lr, struct chmUnitInfo *uimain,
+                   struct chmUnitInfo* uitbl,
+                   struct chmUnitInfo *uistrings,
+                   struct chmUnitInfo* topics,
+                   struct chmUnitInfo *urlstr,
+                   PyObject *dict)
+{
+  uint32_t stroff, urloff;
+  uint64_t i, j, count;
+  size_t length;
+  int wlc_bit = 7;
+  size_t off = 0;
+  uint64_t index = 0;
+  unsigned char entry[TOPICS_ENTRY_LEN];
+  unsigned char combuf[COMMON_BUF_LEN];
+  unsigned char *buffer = (unsigned char *)malloc (wlc_size);
+  char *url = NULL;
+  char *topic = NULL;
+
+  if (chm_retrieve_object(chmfile, uimain, buffer, 
+                          wlc_offset, wlc_size) == 0) {
+    FREE(buffer);
+    return false;
+  }
+
+  for (i = 0; i < wlc_count; ++i) {
+    
+    if(wlc_bit != 7) {
+      ++off;
+      wlc_bit = 7;
+    }
+
+    index += sr_int(buffer + off, &wlc_bit, ds, dr, &length);
+    off += length;
+
+    if(chm_retrieve_object(chmfile, topics, entry, 
+                           index * 16, TOPICS_ENTRY_LEN) == 0) {
+      FREE(topic);
+      FREE(url);
+      FREE(buffer);
+      return false;
+    }
+
+    combuf[COMMON_BUF_LEN - 1] = 0;
+    stroff = get_uint32 (entry + 4);
+
+    FREE (topic);
+    if (chm_retrieve_object (chmfile, uistrings, combuf, 
+                             stroff, COMMON_BUF_LEN - 1) == 0) {
+      topic = strdup ("Untitled in index");
+
+    } else {
+      combuf[COMMON_BUF_LEN - 1] = 0;
+      
+      topic = strdup ((char*)combuf);
+    }
+        
+    urloff = get_uint32 (entry + 8);
+
+    if(chm_retrieve_object (chmfile, uitbl, combuf, 
+                            urloff, 12) == 0) {
+      FREE(buffer);
+      return false;
+    }
+
+    urloff = get_uint32 (combuf + 8);
+    
+    if (chm_retrieve_object (chmfile, urlstr, combuf, 
+                             urloff + 8, COMMON_BUF_LEN - 1) == 0) {
+      FREE(topic);
+      FREE(url);
+      FREE(buffer);
+      return false;
+    }
+         
+    combuf[COMMON_BUF_LEN - 1] = 0;
+
+    FREE (url);
+    url = strdup ((char*)combuf);
+
+    if (url && topic) {
+#ifdef __PYTHON__
+      PyDict_SetItemString (dict, topic, 
+                            PyString_FromString (url));
+#else
+      printf ("%s ==> %s\n", url, topic);
+#endif
+    }
+        
+    count = sr_int (buffer + off, &wlc_bit, cs, cr, &length);
+    off += length;
+    
+    for (j = 0; j < count; ++j) {
+      sr_int (buffer + off, &wlc_bit, ls, lr, &length);
+      off += length;
+    }
+  }
+
+  FREE(topic);
+  FREE(url);
+  FREE(buffer);
+
+  return true;
+}
+
+int 
+chm_search (struct chmFile *chmfile,
+            const char *text, int whole_words, 
+            int titles_only, PyObject *dict)
+{
+  unsigned char header[FTS_HEADER_LEN];
+  unsigned char doc_index_s;
+  unsigned char doc_index_r;
+  unsigned char code_count_s;
+  unsigned char code_count_r;
+  unsigned char loc_codes_s;
+  unsigned char loc_codes_r;
+  unsigned char word_len, pos;
+  unsigned char *buffer;
+  char *word = NULL;
+  uint32_t node_offset;
+  uint32_t node_len;
+  uint16_t tree_depth;
+  uint32_t i;
+  uint16_t free_space;
+  uint64_t wlc_count, wlc_size;
+  uint32_t wlc_offset;
+  char *wrd_buf;
+  unsigned char title;
+  size_t encsz;
+  struct chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;
+  int partial = false;
+
+  if (NULL == text)
+    return -1;
+
+  if (chm_resolve_object (chmfile, "/$FIftiMain", &ui) !=
+      CHM_RESOLVE_SUCCESS || 
+      chm_resolve_object (chmfile, "/#TOPICS", &uitopics) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#STRINGS", &uistrings) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#URLTBL", &uiurltbl) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#URLSTR", &uiurlstr) !=
+      CHM_RESOLVE_SUCCESS)
+    return false;
+
+  if(chm_retrieve_object(chmfile, &ui, header, 0, FTS_HEADER_LEN) == 0)
+    return false;
+  
+  doc_index_s = header[0x1E];
+  doc_index_r = header[0x1F];
+  code_count_s = header[0x20];
+  code_count_r = header[0x21];
+  loc_codes_s = header[0x22];
+  loc_codes_r = header[0x23];
+
+  if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {
+    return false;
+  }
+
+  node_offset = get_uint32 (header + 0x14);
+  node_len = get_uint32 (header + 0x2e);
+  tree_depth = get_uint16 (header + 0x18);
+
+  i = sizeof(uint16_t);
+  
+  buffer = (unsigned char*)malloc (node_len);
+  
+  node_offset = get_leaf_node_offset (chmfile, text, node_offset, node_len,
+                                      tree_depth, &ui);
+  
+  if (!node_offset) { 
+    FREE(buffer);
+    return false;
+  }
+  
+  do {
+    
+    if (chm_retrieve_object (chmfile, &ui, buffer, 
+                             node_offset, node_len) == 0) {
+      FREE(word);
+      FREE(buffer);
+      return false;
+    }
+    
+    free_space = get_uint16 (buffer + 6);
+    
+    i = sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t);
+    
+    encsz = 0;
+
+    while (i < node_len - free_space) {
+      word_len = *(buffer + i);
+      pos = *(buffer + i + 1);
+                        
+      wrd_buf = (char*)malloc (word_len);
+      memcpy (wrd_buf, buffer + i + 2, word_len - 1);
+      wrd_buf[word_len - 1] = 0;
+      
+      if (pos == 0) {
+        FREE(word);
+        word = (char *) strdup (wrd_buf);
+      } else {
+        word = (char*)realloc (word, word_len + pos + 1);
+        strcpy (word + pos, wrd_buf);
+      }
+
+      FREE(wrd_buf);
+      
+      i += 2 + word_len;
+      title = *(buffer + i - 1);
+
+      wlc_count = be_encint (buffer + i, &encsz);
+      i += encsz;
+      
+      wlc_offset = get_uint32 (buffer + i);
+
+      i += sizeof(uint32_t) + sizeof(uint16_t);
+      wlc_size =  be_encint (buffer + i, &encsz);
+      i += encsz;
+      
+      node_offset = get_uint32 (buffer);
+
+      if (!title && titles_only)
+        continue;
+      
+      if (whole_words && !strcasecmp(text, word)) {
+        partial = pychm_process_wlc (chmfile, wlc_count, wlc_size, 
+                                     wlc_offset, doc_index_s, 
+                                     doc_index_r,code_count_s, 
+                                     code_count_r, loc_codes_s, 
+                                     loc_codes_r, &ui, &uiurltbl,
+                                     &uistrings, &uitopics,
+                                     &uiurlstr, dict);
+        FREE(word);
+        FREE(buffer);
+        return partial;
+      }
+      
+      if (!whole_words) {
+        if (!strncasecmp (word, text, strlen(text))) {
+          partial = true;
+          pychm_process_wlc (chmfile, wlc_count, wlc_size, 
+                             wlc_offset, doc_index_s, 
+                             doc_index_r,code_count_s, 
+                             code_count_r, loc_codes_s, 
+                             loc_codes_r, &ui, &uiurltbl,
+                             &uistrings, &uitopics,
+                             &uiurlstr, dict);
+          
+        } else if (strncasecmp (text, word, strlen(text)) < -1)
+          break;
+      }
+
+    }
+  } while (!whole_words && 
+           !strncmp (word, text, strlen(text)) && 
+           node_offset);
+  
+  FREE(word);
+  FREE(buffer);
+
+  return partial;
+}
+
+typedef struct {
+  const char *file;
+  int offset;
+} Langrec;
+
+Langrec lang_files[] = {
+  {"/$FIftiMain",               0x7E},
+  {"$WWKeywordLinks/BTree",     0x34},
+  {"$WWAssociativeLinks/BTree", 0x34}
+};
+
+#define LANG_FILES_SIZE (sizeof(lang_files)/sizeof(Langrec))
+
+int
+chm_get_lcid (struct chmFile *chmfile) {
+  struct chmUnitInfo ui;
+  uint32_t lang;
+  int i;
+
+  for (i=0; i<LANG_FILES_SIZE; i++) {
+  
+    if (chm_resolve_object (chmfile, lang_files[i].file, &ui) == 
+        CHM_RESOLVE_SUCCESS) {
+    
+      if (chm_retrieve_object (chmfile, &ui, (unsigned char *) &lang, 
+                               lang_files[i].offset, sizeof(uint32_t)) != 0)
+        return lang;
+    }
+  }
+
+  return -1;
+}
+
+#ifdef __PYTHON__
+
+static PyObject *
+is_searchable (PyObject *self, PyObject *args) {
+  struct chmFile *file;
+  PyObject *obj0;
+  struct chmUnitInfo ui;
+
+  if (PyArg_ParseTuple (args, "O:is_searchable", &obj0)) {
+
+    file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
+
+    if (chm_resolve_object (file, "/$FIftiMain", &ui) !=
+        CHM_RESOLVE_SUCCESS || 
+        chm_resolve_object (file, "/#TOPICS", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#STRINGS", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#URLTBL", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#URLSTR", &ui) !=
+        CHM_RESOLVE_SUCCESS)
+      return Py_BuildValue ("i", 0);
+    else
+      return Py_BuildValue ("i", 1);
+  } else {
+    PyErr_SetString(PyExc_TypeError, "Expected chmfile (not CHMFile!)");
+    return NULL;
+  }
+}
+
+static PyObject *
+search (PyObject *self, PyObject *args) {
+  char *text;
+  int whole_words;
+  int titles_only;
+  int partial;
+  struct chmFile *file;
+  PyObject *obj0;
+  PyObject *dict;
+
+  if (PyArg_ParseTuple (args, "Osii:search", &obj0, &text, 
+                        &whole_words, &titles_only)) {
+
+    dict = PyDict_New();
+
+    if (dict) {
+      file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
+
+      partial = chm_search (file, 
+                            text, whole_words, titles_only, dict);
+    
+      return Py_BuildValue ("(iO)", partial, dict);
+
+    } else {
+      PyErr_NoMemory();
+      return NULL;
+    }
+  } else {
+    PyErr_SetString(PyExc_TypeError,
+                    "Expected chmfile (not CHMFile!), string, int, int");
+    return NULL;
+  }
+}
+
+static PyObject *
+get_lcid (PyObject *self, PyObject *args) {
+  int code;
+  struct chmFile *file;
+  PyObject *obj0;
+
+  if (PyArg_ParseTuple (args, "O:get_lcid", &obj0)) {
+
+      file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
+
+      code = chm_get_lcid (file);
+    
+      if (code != -1)
+        return Py_BuildValue ("i", code);
+      else 
+        Py_INCREF(Py_None);
+      return Py_None;
+  } else {
+    PyErr_SetString(PyExc_TypeError,"Expected a chmfile (not a CHMFile!)");
+    return NULL;
+  }
+}
+
+static PyMethodDef
+IndexMethods[] = {
+  {"get_lcid", get_lcid, METH_VARARGS, 
+   "Returns LCID (Locale ID) for archive."},
+  {"search", search, METH_VARARGS, 
+   "Perform Full-Text search."},
+  {"is_searchable", is_searchable, METH_VARARGS, 
+   "Return 1 if it is possible to search the archive, 0 otherwise."},
+  {NULL, NULL, 0, NULL}
+};
+
+#ifdef __cplusplus
+extern "C"
+#endif
+MODEXPORT(void)
+initchm_extra (void) {
+  Py_InitModule ("chm_extra", IndexMethods);
+}
+
+#else
+
+int
+main (int argc, char **argv) {
+  struct chmFile *file;
+  char text[255];
+  int whole_words, titles_only;
+  int partial;
+
+  if (argc == 2) {
+    file = chm_open (argv[1]);
+
+    if (file) {
+      printf ("\nLCID= %d (%08X)\n", chm_get_lcid(file), chm_get_lcid(file));
+      while (1) {
+        printf ("\n<whole_words> <titles_only> <string>\n");
+        printf ("> ");
+        if (scanf ("%d %d %s", &whole_words, &titles_only, text))
+          partial = chm_search (file, 
+                                text, whole_words, titles_only, NULL);
+        else 
+          break;
+        
+        printf ("Partial = %d\n", partial);
+      }
+
+      chm_close (file);
+      return 0;
+    }
+
+    return -1;
+
+  } else {
+    printf ("\n%s <filename>\n", argv[0]);
+    return 0;
+  }
+}
+
+#endif
--- a/src/calibre/utils/chm/stdint.h
+++ b/src/calibre/utils/chm/stdint.h
@ -0,0 +1,247 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006-2008 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#ifdef __cplusplus
+extern "C" {
+#endif
+#  include <wchar.h>
+#ifdef __cplusplus
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+#     define _W64 __w64
+#  else
+#     define _W64
+#  endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+   typedef signed char       int8_t;
+   typedef signed short      int16_t;
+   typedef signed int        int32_t;
+   typedef unsigned char     uint8_t;
+   typedef unsigned short    uint16_t;
+   typedef unsigned int      uint32_t;
+#else
+   typedef signed __int8     int8_t;
+   typedef signed __int16    int16_t;
+   typedef signed __int32    int32_t;
+   typedef unsigned __int8   uint8_t;
+   typedef unsigned __int16  uint16_t;
+   typedef unsigned __int32  uint32_t;
+#endif
+typedef signed __int64       int64_t;
+typedef unsigned __int64     uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef signed __int64    intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef _W64 signed int   intptr_t;
+   typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
--- a/src/calibre/utils/chm/swig_chm.c
+++ b/src/calibre/utils/chm/swig_chm.c
--- a/src/calibre/utils/chm/swig_chm.i
+++ b/src/calibre/utils/chm/swig_chm.i
@ -0,0 +1,214 @@
+%module chmlib
+%include "typemaps.i"
+%include "cstring.i"
+
+%{
+/*
+ Copyright (C) 2003 Rubens Ramos <rubensr@users.sourceforge.net>
+
+ Based on code by:
+ Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>
+
+ pychm is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public
+ License along with this program; see the file COPYING.  If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA
+
+ $Id: swig_chm.i,v 1.1.1.1 2003/12/02 12:38:14 rubensr Exp $
+*/
+#include "chm_lib.h"
+#include <stdio.h>
+
+static PyObject *my_callback = NULL;
+
+static PyObject *
+my_set_callback(PyObject *dummy, PyObject *arg)
+{
+    PyObject *result = NULL;
+
+    if (!PyCallable_Check(arg)) {
+      PyErr_SetString(PyExc_TypeError, "parameter must be callable");
+      return NULL;
+    }
+    Py_XINCREF(arg);         /* Add a reference to new callback */
+    Py_XDECREF(my_callback);  /* Dispose of previous callback */
+    my_callback = arg;       /* Remember new callback */
+    /* Boilerplate to return "None" */
+    Py_INCREF(Py_None);
+    result = Py_None;
+    return result;
+}
+
+int dummy_enumerator (struct chmFile *h, 
+                      struct chmUnitInfo *ui, 
+                      void *context) {
+    PyObject *arglist;
+    PyObject *result;
+    PyObject *py_h;
+    PyObject *py_ui;
+    PyObject *py_c;
+
+    py_h  = SWIG_NewPointerObj((void *) h, SWIGTYPE_p_chmFile, 0);
+    py_ui = SWIG_NewPointerObj((void *) ui, SWIGTYPE_p_chmUnitInfo, 0);
+    py_c  = PyCObject_AsVoidPtr(context);
+
+    /* Time to call the callback */
+    arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
+    if (arglist) {
+      result = PyEval_CallObject(my_callback, arglist);
+      Py_DECREF(arglist);
+      Py_DECREF(result);
+      
+      Py_DECREF(py_h);
+      Py_DECREF(py_ui);
+      
+      if (result == NULL) {
+        return 0; /* Pass error back */
+      } else {
+        return 1;
+      }
+    } else
+      return 0;
+ }
+%}
+
+%typemap(in) CHM_ENUMERATOR {
+  if (!my_set_callback(self, $input)) goto fail;
+  $1 = dummy_enumerator;
+}
+
+%typemap(in) void *context {
+  if (!($1 = PyCObject_FromVoidPtr($input, NULL))) goto fail;
+}
+
+%typemap(in, numinputs=0) struct chmUnitInfo *OutValue (struct chmUnitInfo *temp = (struct chmUnitInfo *) calloc(1, sizeof(struct chmUnitInfo))) {
+  $1 = temp;
+}
+
+%typemap(argout) struct chmUnitInfo *OutValue {
+  PyObject *o, *o2, *o3;
+  o = SWIG_NewPointerObj((void *) $1, SWIGTYPE_p_chmUnitInfo, 1);
+  if ((!$result) || ($result == Py_None)) {
+    $result = o;
+  } else {
+    if (!PyTuple_Check($result)) {
+      PyObject *o2 = $result;
+      $result = PyTuple_New(1);
+      PyTuple_SetItem($result,0,o2);
+    }
+    o3 = PyTuple_New(1);
+    PyTuple_SetItem(o3,0,o);
+    o2 = $result;
+    $result = PySequence_Concat(o2,o3);
+    Py_DECREF(o2);
+    Py_DECREF(o3);
+  }
+}
+
+%typemap(check) unsigned char *OUTPUT {
+  /* nasty hack */
+#ifdef __cplusplus
+   $1 = ($1_ltype) new char[arg5];
+#else
+   $1 = ($1_ltype) malloc(arg5);
+#endif
+   if ($1 == NULL) SWIG_fail;
+}
+
+%typemap(argout,fragment="t_output_helper") unsigned char *OUTPUT {
+   PyObject *o;
+   o = PyString_FromStringAndSize($1, arg5);
+   $result = t_output_helper($result,o);
+#ifdef __cplusplus
+   delete [] $1;
+#else
+   free($1);
+#endif
+}
+
+#ifdef WIN32
+typedef unsigned __int64 LONGUINT64;
+typedef __int64          LONGINT64;
+#else
+typedef unsigned long long LONGUINT64;
+typedef long long          LONGINT64;
+#endif
+
+/* the two available spaces in a CHM file                      */
+/* N.B.: The format supports arbitrarily many spaces, but only */
+/*       two appear to be used at present.                     */
+#define CHM_UNCOMPRESSED (0)
+#define CHM_COMPRESSED   (1)
+
+/* structure representing an ITS (CHM) file stream             */
+struct chmFile;
+
+/* structure representing an element from an ITS file stream   */
+#define CHM_MAX_PATHLEN  256
+struct chmUnitInfo
+{
+    LONGUINT64         start;
+    LONGUINT64         length;
+    int                space;
+    char               path[CHM_MAX_PATHLEN+1];
+};
+
+/* open an ITS archive */
+struct chmFile* chm_open(const char *filename);
+
+/* close an ITS archive */
+void chm_close(struct chmFile *h);
+
+/* methods for ssetting tuning parameters for particular file */
+#define CHM_PARAM_MAX_BLOCKS_CACHED 0
+void chm_set_param(struct chmFile *h,
+                   int paramType,
+                   int paramVal);
+
+/* resolve a particular object from the archive */
+#define CHM_RESOLVE_SUCCESS (0)
+#define CHM_RESOLVE_FAILURE (1)
+int chm_resolve_object(struct chmFile *h,
+                       const char *objPath,
+                       struct chmUnitInfo *OutValue);
+
+/* retrieve part of an object from the archive */
+LONGINT64 chm_retrieve_object(struct chmFile *h,
+                              struct chmUnitInfo *ui,
+                              unsigned char *OUTPUT,
+                              LONGUINT64 addr,
+                              LONGINT64 len);
+
+/* enumerate the objects in the .chm archive */
+typedef int (*CHM_ENUMERATOR)(struct chmFile *h,
+                              struct chmUnitInfo *ui,
+                              void *context);
+#define CHM_ENUMERATE_NORMAL    (1)
+#define CHM_ENUMERATE_META      (2)
+#define CHM_ENUMERATE_SPECIAL   (4)
+#define CHM_ENUMERATE_FILES     (8)
+#define CHM_ENUMERATE_DIRS      (16)
+#define CHM_ENUMERATE_ALL       (31)
+#define CHM_ENUMERATOR_FAILURE  (0)
+#define CHM_ENUMERATOR_CONTINUE (1)
+#define CHM_ENUMERATOR_SUCCESS  (2)
+int chm_enumerate(struct chmFile *h,
+                  int what,
+                  CHM_ENUMERATOR e,
+                  void *context);
+
+int chm_enumerate_dir(struct chmFile *h,
+                      const char *prefix,
+                      int what,
+                      CHM_ENUMERATOR e,
+                      void *context);
--- a/src/calibre/utils/help2man.py
+++ b/src/calibre/utils/help2man.py
@ -4,6 +4,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import time, bz2
+from calibre.constants import isfreebsd

 from calibre.constants import __version__, __appname__, __author__

@ -57,6 +58,9 @@ def create_man_page(prog, parser):
    lines = [x if isinstance(x, unicode) else unicode(x, 'utf-8', 'replace') for
            x in lines]

-    return  bz2.compress((u'\n'.join(lines)).encode('utf-8'))
+    if not isfreebsd:
+        return  bz2.compress((u'\n'.join(lines)).encode('utf-8'))
+    else:
+        return  (u'\n'.join(lines)).encode('utf-8')