merging with trunk

2025-11-03 19:17:02 -05:00 · 2010-03-05 20:18:22 -08:00 · 2010-03-05 20:18:22 -08:00 · 07e888f764
commit 07e888f764
parent 91a2881a0c 97babd672e
170 changed files with 61702 additions and 19059 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,151 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.
 - version: 0.6.44
  date: 2010-03-05
  new features:
    - title: "Experimental support for conversion of CHM files"
      type: major
      description : >
        "Conversion and reading of metadata from CHM files is now supported. This feature is
        still experimental, with more testing needed. Building from source on linux now 
        requires chmlib."
    - title: "Experimental support for fetching annotations from the Kindles"
      type: major
      description: >
        "calibre can now fetch annotations from your kindle and put them into the
        comments field. To fetch annotations, click the arrow next to the 
        'send to device' button and select 'Fetch Annotations', with your Kindle
        connected."
    - title: "Support FreeBSD out of the box (except USB)"
      type: major
      tickets: [4715]
    - title: "News download scheduler: Don't try to download news when no active internet connection is present (linux/windows only)"
    - title: "EPUB to WPUB conversion: Preserve font encryption"
    - title: "calibre-server: Add --pidfile and --daemonize (unix only) options"
    - title: "Plugins: When loading a plugin zip file extract to temp dir and add to sys.path, if the zip file contains binay code (pyd/dll/so/dylib), instead of just adding the zip file to the path, as python cannot load compiled code from a zip file"
  bug fixes:
    - title: "Ebook-viewer: Handle non-ascii CSS files when doing font substitutions"
    - title: "Conversion pipline: Ignore non-integral play orders when parsing NCX files"
    - title: "When decoding NCX toc files, if no encoding is declared and detection has less that 100% confidence, assume UTF-8."
      tickets: [5039]
    - title: "PML chapter definitions missing from toc.ncx"
      tickets: [4990]
    - title: "Unicode string for cover causes calibredb --output-format stanza to fail"
      ticket: [5035]
    - title: "Search cover:False fails, cover:True succeeds"
      tickets: [5034]
    - title: "Plugins: correctly use context"
    - title: "MOBI Input: Don't lose cover if it is also referred to in main text"
      ticket: [5020]
    - title: "RTF Output: Don't choke on PNG images"
  new recipes:
    - title: Journal of Hospital Medicine, San Francisco Bay Guardian, Smithsonian Magazine
      author: Krittika Goyal
    - title: Astronomy Pick of the Day, Epicurious
      author: Starson17
    - title: Diario Vasco, Various Chilean newspapers
      author: Darko Miletic
    - title: Kukuburi
      author: Mori
  improved recipes:
    - Ars Technica
    - Fudzilla
    - The Atlantic
    - The Economist
    - Huffington Post
 - version: 0.6.43
  date: 2010-02-26
  new features:
    - title: "Support for the Teclast K3 and Elonex e-book readers"
    - title: "Add 'Recently Read' category to catalog if Kindle is connected when catalog is generated"
    - title: "When adding PRC/MOBI files that are actually Topaz files, change detected file type to Topaz"
    - title: "MOBI Output: If the SVG rasterizer is not avaialbale continue anyway"
    - title: "News download: When using the debug pipeline options, create a zip file named periodical.downloaded_recipe in the debug directory. This can be passed to ebook-convert to directly convert a previous download into an e-book."
    - title: "Add Apply button to catalog generation dialog"
  bug fixes:
    - title: "When fetching metadata in the edit metadata dialog, use a python thread instead of a Qt thread. Hopefully this will fix the reports of crashes when fetching metadata"
    - title: "Refresh cover browser when a cover is changed via the edit meta information dialog"
    - title: "More device detection debug output on OS X"
    - title: "Download only covers should not also set social metadata"
      tickets: [4966]
    - title: "Content server: If fail to bind to 0.0.0.0 try detecting and binding only to interface used for outgoing traffic"
    - title: "Handle poorly designed import plugins that return None on error"
    - title: "Move logic for removing inline navbars out of the BasicNewsRecipe class"
    - title: "MOBI metadata: When setting title, set in both PalmDoc and EXTH headers"
    - title: "MOBI metadata: Do not try to extarct embedded metadata from MOBI files larger than 4MB"
    - title: "Handle PDB files that contain PDF files"
      tickets: [4971]
    - title: "PML Input: Various fixes"
      tickets: [4959,4961]
    - title: "Fix reading MOBI metadata from files in zip/rar archives"
    - title: "Make extracting single files from RAR archives more efficient"
    - title: "No longer need Qt to generate default cover for news downloads"
    - title: "Catalog generation: fix for EPUB anchors beginning with numbers in Recently Added"
    - title: "Searching: Handle uppercase keywords correctly"
      tickets: [4951]
  new recipes:
    - title: Gamasutra
      author: Darko Miletic
  improved recipes:
    - "Strategy+Business"
    - Arizona Daily Star
    - Heise
    - New Scientist
    - Various Serbian news feeds
    - Houston and San Francisco Chronicles
 - version: 0.6.42
  date: 2010-02-20
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -79,9 +79,24 @@ p.unread_book {
 	text-indent:-2em;
 	}
 p.date_read {
 	text-align:left;
 	margin-top:0px;
 	margin-bottom:0px;
 	margin-left:6em;
 	text-indent:-6em;
 	}
 hr.series_divider {
 	width:50%;
 	margin-left:1em;
 	margin-top:0em;
 	margin-bottom:0em;
 	}
 hr.annotations_divider {
 	width:50%;
 	margin-left:1em;
 	margin-top:0em;
 	margin-bottom:0em;
 	}
--- a/resources/images/library.png
+++ b/resources/images/library.png
--- a/resources/images/news/diariovasco.png
+++ b/resources/images/news/diariovasco.png
--- a/resources/images/news/gamasutra_fa.png
+++ b/resources/images/news/gamasutra_fa.png
--- a/resources/images/news/gamasutra_news.png
+++ b/resources/images/news/gamasutra_news.png
--- a/resources/recipes/24sata_rs.recipe
+++ b/resources/recipes/24sata_rs.recipe
@ -1,7 +1,6 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 24sata.rs
@ -9,7 +8,6 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Ser24Sata(BasicNewsRecipe):
    title                 = '24 Sata - Sr'
@ -17,22 +15,20 @@ class Ser24Sata(BasicNewsRecipe):
    description           = '24 sata portal vesti iz Srbije'
    publisher             = 'Ringier d.o.o.'
    category              = 'news, politics, entertainment, Serbia'
-    oldest_article        = 7
+    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    language = 'sr'
+    language              = 'sr'
-
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    lang                  = 'sr-Latn-RS'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
-                        , 'pretty_print'     : True
+                        , 'linearize_tables' : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,25 +36,6 @@ class Ser24Sata(BasicNewsRecipe):
    feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
        soup.head.insert(0,mlang)
        soup.head.insert(1,mcharset)
        return self.adeify_images(soup)
    def print_version(self, url):
--- a/resources/recipes/apod.recipe
+++ b/resources/recipes/apod.recipe
@ -0,0 +1,37 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class APOD(BasicNewsRecipe):
    title          = u'Astronomy Picture of the Day'
    __author__  = 'Starson17'
    description = 'Astronomy Pictures'
    language = 'en'
    use_embedded_content    = False
    no_stylesheets        = True
    cover_url     = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg'
    remove_javascript = True
    recursions = 0
    oldest_article        = 14
    feeds = [
             (u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss')
             ]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
 		'''
    def postprocess_html(self, soup, first_fetch):
        center_tags = soup.findAll(['center'])
        p_tags = soup.findAll(['p'])
        last_center = center_tags[-1:]
        last_center[0].extract()
        first_p = p_tags[:1]
        for tag in first_p:
            tag.extract()
        last2_p = p_tags[-2:]
        for tag in last2_p:
            tag.extract()
        return soup
--- a/resources/recipes/ars_technica.recipe
+++ b/resources/recipes/ars_technica.recipe
@ -5,6 +5,7 @@ __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 arstechnica.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
@ -20,7 +21,7 @@ class ArsTechnica2(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    extra_css             = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
+    extra_css             = ' body {font-family: Arial,Helvetica,sans-serif} .title{text-align: left} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
    conversion_options = {
                             'comments'  : description
@ -30,6 +31,10 @@ class ArsTechnica2(BasicNewsRecipe):
                         }
    preprocess_regexps = [
                (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
               ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
                         ]
    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
@ -37,7 +42,7 @@ class ArsTechnica2(BasicNewsRecipe):
                     dict(name=['object','link','embed'])
                    ,dict(name='div', attrs={'class':'read-more-link'})
                  ]
-
+    remove_attributes=['width','height']
    feeds = [
              (u'Infinite Loop (Apple content)'        , u'http://feeds.arstechnica.com/arstechnica/apple/'      )
@ -90,3 +95,5 @@ class ArsTechnica2(BasicNewsRecipe):
        return soup
    def get_article_url(self, article):
        return article.get('guid',  None).rpartition('?')[0]
--- a/resources/recipes/atlantic.recipe
+++ b/resources/recipes/atlantic.recipe
@ -5,76 +5,103 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 theatlantic.com
 '''
-import re
+import string
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 class TheAtlantic(BasicNewsRecipe):
    title      = 'The Atlantic'
    __author__ = 'Kovid Goyal and Sujata Raman'
    description = 'Current affairs and politics focussed on the US'
-    INDEX = 'http://www.theatlantic.com/doc/current'
+    INDEX = 'http://www.theatlantic.com/magazine/toc/0/'
    language = 'en'
-    remove_tags_before = dict(name='div', id='storytop')
+    remove_tags_before = dict(name='div', id='articleHead')
-    remove_tags        = [
+    remove_tags_after  = dict(id='copyright')
-                        dict(name='div', id=['seealso','storybottom',  'footer', 'ad_banner_top', 'sidebar','articletoolstop','subcontent',]),
+    remove_tags        = [dict(id=['header', 'printAds', 'pageControls'])]
-                        dict(name='p', attrs={'id':["pagination"]}),
+    no_stylesheets = True
-                        dict(name='table',attrs={'class':"tools"}),
+
-                        dict(name='style'),
+
-                        dict(name='a', href='/a/newsletters.mhtml')
+    def print_version(self, url):
-                         ]
+        return url.replace('/archive/', '/print/')
    remove_attributes = ['icap', 'callout', 'style']
    no_stylesheets     = True
    conversion_options = { 'linearize_tables':True }
    extra_css = '''
                    #timestamp{font-family:Arial,Helvetica,sans-serif; color:#666666 ;font-size:x-small}
                    #storytype{font-family:Arial,Helvetica,sans-serif; color:#D52B1E ;font-weight:bold; font-size:x-small}
                    h2{font-family:georgia,serif; font-style:italic;font-size:x-small;font-weight:normal;}
                    h1{font-family:georgia,serif; font-weight:bold; font-size:large}
                    #byline{font-family:georgia,serif; font-weight:bold; font-size:x-small}
                    #topgraf{font-family:Arial,Helvetica,sans-serif;font-size:x-small;font-weight:bold;}
                    .artsans{{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
                '''
    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.INDEX)
        sectit = soup.find('h1', attrs={'class':'sectionTitle'})
        if sectit is not None:
            texts = sectit.findAll('cufontext')
            texts = map(self.tag_to_string, texts[-2:])
            self.timefmt = ' [%s]'%(''.join(texts))
-        issue = soup.find('span', attrs={'class':'issue'})
+        cover = soup.find('img', src=True, attrs={'class':'cover'})
        if issue:
            self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
        cover = soup.find('img', alt=re.compile('Cover'), src=True)
        if cover is not None:
-            self.cover_url = 'http://theatlantic.com'+cover['src']
+            self.cover_url = cover['src']
-        for item in soup.findAll('div', attrs={'class':'item'}):
+        feeds = []
-            a = item.find('a')
+        for section in soup.findAll('div', attrs={'class':'magazineSection'}):
-            if a and a.has_key('href'):
+            section_title = section.find(attrs={'class':'sectionHeader'})
            section_title = string.capwords(self.tag_to_string(section_title))
            self.log('Found section:', section_title)
            articles = []
            for post in section.findAll('div', attrs={'class':'post'}):
                h = post.find(['h3', 'h4'])
                title = self.tag_to_string(h)
                a = post.find('a', href=True)
                url = a['href']
-                if not url.startswith('http://'):
+                if url.startswith('/'):
-                    url = 'http://www.theatlantic.com/'+url
+                    url = 'http://www.theatlantic.com'+url
-                url = url.replace('/doc/', '/doc/print/')
+                p = post.find('p', attrs={'class':'dek'})
-                title = self.tag_to_string(a)
+                desc = None
-                if title in ('VIDEO', 'AUDIO', 'INTERACTIVE MAP', 'SIDEBAR', 'RECIPES'):
+                self.log('\tFound article:', title, 'at', url)
-                    continue
+                if p is not None:
-                title = title.replace('&AMP;', '&')
+                    desc = self.tag_to_string(p)
-                byline = item.find(attrs={'class':'byline'})
+                    self.log('\t\t', desc)
-                date = self.tag_to_string(byline) if byline else ''
+                articles.append({'title':title, 'url':url, 'description':desc,
-                description = ''
+                    'date':''})
            feeds.append((section_title, articles))
-                self.log('\tFound article:', title)
+        poems = []
-                self.log('\t\t', url)
+        self.log('Found section: Poems')
        for poem in soup.findAll('div', attrs={'class':'poem'}):
            title = self.tag_to_string(poem.find('h4'))
            desc  = self.tag_to_string(poem.find(attrs={'class':'author'}))
            url   = 'http://www.theatlantic.com'+poem.find('a')['href']
            self.log('\tFound article:', title, 'at', url)
            self.log('\t\t', desc)
            poems.append({'title':title, 'url':url, 'description':desc,
                    'date':''})
        if poems:
            feeds.append(('Poems', poems))
-                articles.append({
+        self.log('Found section: Advice')
-                                 'title':title,
+        div = soup.find(id='advice')
-                                 'date':date,
+        title = self.tag_to_string(div.find('h4'))
-                                 'url':url,
+        url = 'http://www.theatlantic.com'+div.find('a')['href']
-                                 'description':description
+        desc = self.tag_to_string(div.find('p'))
-                            })
+        self.log('\tFound article:', title, 'at', url)
        self.log('\t\t', desc)
        feeds.append(('Advice', [{'title':title, 'url':url, 'description':desc,
                    'date':''}]))
        return feeds
    def postprocess_html(self, soup, first):
        for table in soup.findAll('table', align='right'):
            img = table.find('img')
            if img is not None:
                img.extract()
                caption = self.tag_to_string(table).strip()
                div = Tag(soup, 'div')
                div['style'] = 'text-align:center'
                div.insert(0, img)
                div.insert(1, Tag(soup, 'br'))
                if caption:
                    div.insert(2, NavigableString(caption))
                table.replaceWith(div)
        return soup
        return [('Current Issue', articles)]
--- a/resources/recipes/azstarnet.recipe
+++ b/resources/recipes/azstarnet.recipe
@ -1,10 +1,10 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
-www.azstarnet.com
+azstarnet.com
 '''
-
+import urllib
 from calibre.web.feeds.news import BasicNewsRecipe
 class Azstarnet(BasicNewsRecipe):
@ -14,12 +14,12 @@ class Azstarnet(BasicNewsRecipe):
    language              = 'en'
    publisher             = 'azstarnet.com'
    category              = 'news, politics, Arizona, USA'
    delay                 = 1
    oldest_article        = 3
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    masthead_url          = 'http://azstarnet.com/content/tncms/live/global/resources/images/logo.gif'
    needs_subscription    = True
    conversion_options = {
@ -32,31 +32,27 @@ class Azstarnet(BasicNewsRecipe):
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open('http://azstarnet.com/')
        if self.username is not None and self.password is not None:
-            br.open('http://azstarnet.com/registration/retro.php')
+            data = urllib.urlencode({ 'm':'login'
-            br.select_form(nr=1)
+                                     ,'u':self.username
-            br['email'] = self.username
+                                     ,'p':self.password
-            br['pass' ] = self.password
+                                     ,'z':'http://azstarnet.com/'
-            br.submit()
+                                   })
            br.open('http://azstarnet.com/app/registration/proxy.php',data)
        return br
-
+    remove_tags = [dict(name=['object','link','iframe','base','img'])]
    keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
    remove_tags = [
                     dict(name=['object','link','iframe','base','img'])
                    ,dict(name='div',attrs={'class':'bannerinstory'})
                  ]
    feeds = [
-               (u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')
+               (u'Local News'    , u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc')
-              ,(u'Sports'       , u'http://rss.azstarnet.com/index.php?site=sports')
+              ,(u'National News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc')
-              ,(u'Business'     , u'http://rss.azstarnet.com/index.php?site=biz-topheadlines')
+              ,(u'World News'    , u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc')
-              ,(u'Nation-World' , u'http://rss.azstarnet.com/index.php?site=news')
+              ,(u'Sports'        , u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc')
-              ,(u'Opinion'      , u'http://rss.azstarnet.com/index.php?site=opinion')
+              ,(u'Opinion'       , u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc')
-              ,(u'Lifestyle'    , u'http://rss.azstarnet.com/index.php?site=accent')
+              ,(u'Movies'        , u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc')
-              ,(u'Food'         , u'http://rss.azstarnet.com/index.php?site=food')
+              ,(u'Food'          , u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
            ]
    def preprocess_html(self, soup):
@ -64,4 +60,6 @@ class Azstarnet(BasicNewsRecipe):
            del item['style']
        return soup
    def print_version(self, url):
        return url + '?print=1'
--- a/resources/recipes/b92.recipe
+++ b/resources/recipes/b92.recipe
@ -1,7 +1,6 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 b92.net
 '''
@ -19,16 +18,15 @@ class B92(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1250'
-    language = 'sr'
+    language              = 'sr'
-
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} '
    lang                  = 'sr-Latn-RS'
    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -50,20 +48,5 @@ class B92(BasicNewsRecipe):
        return url + '&version=print'
    def preprocess_html(self, soup):
-        del soup.body['onload']
+        return self.adeify_images(soup)
-        for item in soup.findAll('font'):
+
            item.name='div'
            if item.has_key('size'):
               del item['size']
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]                           
        return soup
--- a/resources/recipes/beta.recipe
+++ b/resources/recipes/beta.recipe
@ -1,13 +1,11 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 beta.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class Danas(BasicNewsRecipe):
    title                 = 'BETA'
@ -19,18 +17,14 @@ class Danas(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = True
-    language = 'sr'
+    language              = 'sr'
-
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '
    lang                  = 'sr-Latn-RS'
    direction             = 'ltr'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
                        , 'pretty_print'     : True
                        }
@ -43,9 +37,4 @@ class Danas(BasicNewsRecipe):
                     ]
    def preprocess_html(self, soup):
        soup.html['lang'] = self.lang
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
        soup.head.insert(0,mlang)
        soup.head.insert(1,mcharset)
        return self.adeify_images(soup)
--- a/resources/recipes/blic.recipe
+++ b/resources/recipes/blic.recipe
@ -14,14 +14,13 @@ class Blic(BasicNewsRecipe):
    description           = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
    publisher             = 'RINGIER d.o.o.'
    category              = 'news, politics, Serbia'
    delay                 = 1
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    masthead_url          = 'http://www.blic.rs/resources/images/header/header_back.png'
    language              = 'sr'
-
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} '
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
    conversion_options = {
                          'comment'  : description
@ -31,13 +30,15 @@ class Blic(BasicNewsRecipe):
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    remove_tags_before = dict(name='div', attrs={'id':'article_info'})
    remove_tags        = [dict(name=['object','link'])]
    remove_attributes  = ['width','height']
    feeds              = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]
    remove_tags        = [dict(name=['object','link'])]
    def print_version(self, url):
        return url + '/print'
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/cetnixploitation.recipe
+++ b/resources/recipes/cetnixploitation.recipe
@ -0,0 +1,36 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 chetnixploitation.blogspot.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Chetnixploitation(BasicNewsRecipe):
    title                 = 'Chetnixploitation'
    __author__            = 'Darko Miletic'
    description           = 'Filmski blog'    
    oldest_article        = 7
    max_articles_per_feed = 100
    language              = 'sr'
    encoding              = 'utf-8'
    no_stylesheets        = True
    use_embedded_content  = True
    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : 'film, blog, cetnici, srbija, ex-yu'
                        , 'publisher': 'Son of Man'
                        , 'language' : language
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    feeds = [(u'Posts', u'http://chetnixploitation.blogspot.com/feeds/posts/default')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -20,7 +20,7 @@ class Danas(BasicNewsRecipe):
    encoding              = 'utf-8'
    masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
    language              = 'sr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
    conversion_options = {
                          'comment'          : description
@ -38,7 +38,7 @@ class Danas(BasicNewsRecipe):
                    ,dict(name=['object','link','iframe'])
                  ]
-    feeds          = [
+    feeds          = [ 
                        (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27')
                       ,(u'Hronika'  , u'http://www.danas.rs/rss/rss.asp?column_id=2' )
                       ,(u'Drustvo'  , u'http://www.danas.rs/rss/rss.asp?column_id=24')
@ -60,4 +60,4 @@ class Danas(BasicNewsRecipe):
    def print_version(self, url):
        return url + '&action=print'
-
+        
--- a/resources/recipes/diariovasco.recipe
+++ b/resources/recipes/diariovasco.recipe
@ -0,0 +1,50 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.diariovasco.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DiarioVasco(BasicNewsRecipe):
    title                 = 'Diario Vasco'
    __author__            = 'Darko Miletic'
    description           = 'Noticias de pais Vasco y el resto del mundo'
    publisher             = 'Diario Vasco'
    category              = 'news, politics, Spain'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.diariovasco.com/img/rd.logotipo2_dvasco.gif'
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    keep_only_tags = [
                         dict(attrs={'id':'title'})
                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
                     ]
    remove_tags = [dict(name='ul')]
    remove_attributes = ['width','height']
    feeds = [
              (u'Ultimas Noticias' , u'http://www.diariovasco.com/rss/feeds/ultima.xml'       )
             ,(u'Portada'          , u'http://www.diariovasco.com/portada.xml'                )
             ,(u'Politica'         , u'http://www.diariovasco.com/rss/feeds/politica.xml'     )
             ,(u'Deportes'         , u'http://www.diariovasco.com/rss/feeds/deportes.xml'     )
             ,(u'Economia'         , u'http://www.diariovasco.com/rss/feeds/economia.xml'     )
             ,(u'Mundo'            , u'http://www.diariovasco.com/rss/feeds/mundo.xml'        )
             ,(u'Cultura'          , u'http://www.diariovasco.com/rss/feeds/cultura.xml'      )
             ,(u'Gente'            , u'http://www.diariovasco.com/rss/feeds/gente.xml'        )
             ,(u'Contraportada'    , u'http://www.diariovasco.com/rss/feeds/contraportada.xml')
            ]
--- a/resources/recipes/e_novine.recipe
+++ b/resources/recipes/e_novine.recipe
@ -1,7 +1,5 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 e-novine.com
@ -9,7 +7,6 @@ e-novine.com
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class E_novine(BasicNewsRecipe):
    title                 = 'E-Novine'
@ -20,40 +17,38 @@ class E_novine(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'cp1250'
+    encoding              = 'utf-8'
    use_embedded_content  = False
-    language = 'sr'
+    language              = 'sr'
-
+    masthead_url          = 'http://www.e-novine.com/themes/e_novine/img/logo.gif'
-    lang                  = 'sr'
+    extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} '
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
                        , 'pretty_print'     : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
+    keep_only_tags = [
                         dict(name='div', attrs={'class':'article_head'})
                        ,dict(name='div', attrs={'id':'article_body'})
                     ]
-    remove_tags = [dict(name=['object','link','embed','iframe'])]
+    remove_tags = [
                     dict(name=['object','link','embed','iframe'])
                    ,dict(attrs={'id':'box_article_tools'})
                  ]
    remove_attributes = ['height','width','lang']
-    feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
+    feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss' )]
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
-        ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
+        return self.adeify_images(soup)
-        if ftag:
+
-           it = ftag.div
+    def print_version(self, url):
-           it.extract()
+        return url + '?print'
           ftag.div.extract()
           ftag.insert(0,it)
        return soup
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@ -119,6 +119,8 @@ class Economist(BasicNewsRecipe):
            ns = NavigableString(self.tag_to_string(caption))
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
            del img['width']
            del img['height']
            img.extract()
            div.insert(2, img)
            table.replaceWith(div)
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -123,6 +123,8 @@ class Economist(BasicNewsRecipe):
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
            img.extract()
            del img['width']
            del img['height']
            div.insert(2, img)
            table.replaceWith(div)
        return soup
--- a/resources/recipes/epicurious.recipe
+++ b/resources/recipes/epicurious.recipe
@ -0,0 +1,58 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Starson17'
 '''
 www.epicurious.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Epicurious(BasicNewsRecipe):
    title          = u'Epicurious'
    __author__  = 'Starson17'
    description = 'Food and Recipes from Epicurious'
    cover_url     = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
    publisher      = 'Epicurious'
    tags           = 'news, food, gourmet, recipes'
    language = 'en'
    use_embedded_content    = False
    no_stylesheets        = True
    remove_javascript = True
    recursions = 3
    oldest_article        = 14
    max_articles_per_feed = 20
    keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
                      dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
                           ]
    remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
                   {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
                   dict(name='div', attrs={'class':['tagged','comments']})
                   ]
    remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
    feeds = [
             (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
             (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
             (u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
             (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
             ]
    match_regexps = [
                     r'http://www.epicurious.com/.*recipes/.*/views'
                     ]
    preprocess_regexps = [
        (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
        (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
        (re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
        ]
    def postprocess_html(self, soup, first_fetch):
        for t in soup.findAll(['table', 'tr', 'td']):
            t.name = 'div'
        return soup
--- a/resources/recipes/fudzilla.recipe
+++ b/resources/recipes/fudzilla.recipe
@ -1,27 +1,41 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010 Starson17'
 '''
 fudzilla.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Fudzilla(BasicNewsRecipe):
    title                 = u'Fudzilla'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Starson17'
    language = 'en'
    description           = 'Tech news'
    oldest_article        = 7
    remove_javascript = True
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    feeds = [ (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')]
-    def print_version(self, url):
+    remove_tags_before = dict(name='div', attrs={'class':['padding']})
-        nurl = url.replace('http://www.fudzilla.com/index.php','http://www.fudzilla.com/index2.php')
+
-        nmain, nsep, nrest = nurl.partition('&Itemid=')
+    remove_tags = [dict(name='td', attrs={'class':['left','right']}),
-        return  nmain + '&pop=1&page=0&Itemid=1'
+                   dict(name='div', attrs={'id':['toolbar','buttons']}), 
                   dict(name='div', attrs={'class':['artbannersxtd','back_button']}), 
                   dict(name='span', attrs={'class':['pathway']}), 
                   dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}), 
                   dict(name='table', attrs={'class':['headlines']}), 
                   ]
    feeds = [
             (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
             ]
    preprocess_regexps = [
        (re.compile(r'<p class="MsoNormal"> Welcome.*</p> ', re.DOTALL|re.IGNORECASE), lambda match: '')
        ]
--- a/resources/recipes/gamasutra_fa.recipe
+++ b/resources/recipes/gamasutra_fa.recipe
@ -0,0 +1,56 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 gamasutra.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Gamasutra(BasicNewsRecipe):
    title                 = 'Gamasutra Featured articles'
    __author__            = 'Darko Miletic'
    description           = 'The Art and Business of Making Games'
    publisher             = 'Gamasutra'
    category              = 'news, games, IT'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .title{font-size: x-large; font-weight: bold} '
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
    preprocess_regexps = [
                           (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
                          ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
                          ,(re.compile(r'</head>', re.DOTALL|re.IGNORECASE),lambda match: '</head><body>')
                         ]
    remove_tags       = [
                          dict(name=['object','embed','iframe'])
                         ,dict(attrs={'class':'adBox'})
                         ]
    remove_tags_before = dict(attrs={'class':'title'})
    remove_attributes = ['width','height','name']
    feeds = [(u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')]
    def print_version(self, url):
        return url + '?print=1'
    def get_article_url(self, article):
        return article.get('guid',  None)
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return self.adeify_images(soup)
--- a/resources/recipes/gamasutra_news.recipe
+++ b/resources/recipes/gamasutra_news.recipe
@ -0,0 +1,45 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 gamasutra.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Gamasutra(BasicNewsRecipe):
    title                 = 'Gamasutra News'
    __author__            = 'Darko Miletic'
    description           = 'The Art and Business of Making Games'
    publisher             = 'Gamasutra'
    category              = 'news, games, IT'
    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .newsTitle{font-size: xx-large; font-weight: bold} '
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : True
                        }
    remove_tags       = [dict(attrs={'class':['relatedNews','adBox']})]
    keep_only_tags    = [dict(attrs={'class':['newsTitle','newsAuth','newsDate','newsText']})]
    remove_attributes = ['width','height']
    feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')]
    def get_article_url(self, article):
        return article.get('guid',  None)
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return self.adeify_images(soup)
--- a/resources/recipes/glas_srpske.recipe
+++ b/resources/recipes/glas_srpske.recipe
@ -1,7 +1,6 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 glassrpske.com
@ -9,7 +8,6 @@ glassrpske.com
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class GlasSrpske(BasicNewsRecipe):
    title                 = 'Glas Srpske'
@ -22,20 +20,16 @@ class GlasSrpske(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    cover_url             = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
+    masthead_url          = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
-    lang                  = 'sr-BA'
+    language              = 'sr'
    language = 'sr'
    INDEX                 = 'http://www.glassrpske.com'
-
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} '
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
                        , 'pretty_print'     : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -63,11 +57,7 @@ class GlasSrpske(BasicNewsRecipe):
            ]
    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
+        return self.adeify_images(soup)
        soup.html['lang']     = self.lang
        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
        soup.head.insert(0,mlang)
        return soup
    def parse_index(self):
        totalfeeds = []
--- a/resources/recipes/glasjavnosti.recipe
+++ b/resources/recipes/glasjavnosti.recipe
@ -1,7 +1,6 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.glas-javnosti.rs
 '''
@ -18,18 +17,14 @@ class GlasJavnosti(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = False
    use_embedded_content  = False
-    language = 'sr'
+    language              = 'sr'
-
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '
    lang                  = 'sr-Latn-RS'
    direction             = 'ltr'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
                        , 'pretty_print'     : True
                        }
--- a/resources/recipes/heise.recipe
+++ b/resources/recipes/heise.recipe
@ -9,17 +9,15 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class heiseDe(BasicNewsRecipe):
-
+    
    title = 'heise'
    description = 'Computernews from Germany'
    __author__ = 'Oliver Niesner'
    language = 'de'
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 40
    no_stylesheets = True
-
+    
    remove_tags = [dict(id='navi_top'),
 		   dict(id='navi_bottom'),
 		   dict(id='logo'),
@ -35,12 +33,10 @@ class heiseDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
 		   dict(name='p', attrs={'class':'news_option'}),
 		   dict(name='p', attrs={'class':'news_navi'}),
-		   dict(name='p', attrs={'class':'news_foren'})]
+		   dict(name='div', attrs={'class':'news_foren'})]
-    remove_tags_after = [dict(name='p', attrs={'class':'news_foren'})]
+    remove_tags_after = [dict(name='div', attrs={'class':'news_foren'})]
-
+    
-    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ]
+    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ] 
-
+    
--- a/resources/recipes/houston_chronicle.recipe
+++ b/resources/recipes/houston_chronicle.recipe
@ -1,17 +1,41 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe
 class HoustonChronicle(BasicNewsRecipe):
    title          = u'The Houston Chronicle'
    description    = 'News from Houston, Texas'
-    __author__	   = 'Kovid Goyal'
+    __author__	   = 'Kovid Goyal and Sujata Raman'
    language       = 'en'
    timefmt        = ' [%a, %d %b, %Y]'
    no_stylesheets = True
-    keep_only_tags = [dict(id=['story-head', 'story'])]
+    keep_only_tags = [
-    remove_tags = [dict(id=['share-module', 'resource-box',
+                        dict(id=['story-head', 'story'])
-        'resource-box-header'])]
+                     ]
    remove_tags    = [
                        dict(id=['share-module', 'resource-box',
                        'resource-box-header'])
                     ]
    extra_css      = '''
                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
                        #story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
                        #story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
                        #story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
                        #story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
                        #story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
                        #Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
                        .p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
                        .p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
                     '''
    def parse_index(self):
        soup = self.index_to_soup('http://www.chron.com/news/')
@ -64,3 +88,6 @@ class HoustonChronicle(BasicNewsRecipe):
            feeds.append((current_section, current_articles))
        return feeds
--- a/resources/recipes/huffingtonpost.recipe
+++ b/resources/recipes/huffingtonpost.recipe
@ -3,7 +3,7 @@ import re
 class HuffingtonPostRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
-    __author__ = 'kwetal'
+    __author__ = 'kwetal and Archana Raman'
    language = 'en'
    version = 2
@ -14,70 +14,89 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
    oldest_article = 1.1
    max_articles_per_feed = 100
-    use_embedded_content = True
+    #use_embedded_content = True
    encoding = 'utf-8'
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
    # Feeds from: http://www.huffingtonpost.com/syndication/
    feeds = []
    feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))
-    #feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
+    feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
-    feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
+    #feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
-    feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
+    #feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
-    #feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
+    feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
-    feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
+    #feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
-    feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
+    #feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
-    #feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
+    feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
-    feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
+    #feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
-    feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
+    #feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
-    #feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
+    feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
-    feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
+    #feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
-    feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
+    #feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
-    #feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
+    feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
-    feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
+    #feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
-    feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
+    #feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
-    #feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
+    feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
-    feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
+    #feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
-    feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
+    #feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
-    #feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
+    feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
-    feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
+    #feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
-    feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
+    #feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
-    #feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
+    feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
-    feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
+    #feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
-    feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
+    #feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
-    #feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
+    feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
-    feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
+    #feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
-    feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
+    #feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
-    #feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
+    feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
-    feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
+    #feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
-    feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
+    #feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
    feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
-    feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
+    #feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
    remove_tags = []
    remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
    remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
    remove_tags.append(dict(name='a', attrs={'class' : 'home_pixie'}))
    remove_tags.append(dict(name='div', attrs={'id' : ["top_nav",'threeup_top_wrapper','breaking_news_container',"hp_social_network"]}))
    remove_tags.append(dict(name='img', alt="Connect"))
    remove_tags.append(dict(name='div', attrs={'class' : ['logo']}))    #'share_boxes_box_block_b_wraper',
    remove_tags.append(dict(name='div', attrs={'class' :[ 'read_more with_verticals','chicklets_box_outter_v05','blogger_menu_content','chicklets_bar']}))
    remove_tags.append(dict(name='div', attrs={'class' : ['sidebar_blog_first_design','sidebar_blog_second_design',]}))
    remove_tags.append(dict(name='div', attrs={'class' : ['main_big_news_ontop','login-menu','sidebar_blog_third_design','read_more']}))
    remove_tags_after = [dict(name='div', attrs={'class' : 'entry_content'}) ]
   # remove_attributes = ['style']
    remove_attributes = ['style']
    extra_css = '''
                    h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
                    h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
                    h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
                    body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
-                    h2{font-size: x-large; font-weight: bold; padding: 0em; margin-bottom: 0.2em;}
+                    #title_permalink{color:black;font-size:large;}
-                    a[href]{color: blue; text-decoration: none; cursor: pointer;}
+                    .date{color:#858585;font-family:"Times New Roman",sans-serif;}
                    .comments_datetime v05{color:#696969;}
                    .teaser_permalink{font-style:italic;font-size:xx-small;}
                    .blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
                    '''
-
+#a[href]{color: blue; text-decoration: none; cursor: pointer;}
    def get_article_url(self, article):
        """
            Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
@ -85,10 +104,21 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
            Todo: refactor to searching this list to avoid the hardcoded zero-index
        """
        link = article.get('link')
        print("Link:"+link)
        if not link:
            links = article.get('links')
            if links:
                link = links[0]['href']
                if not links[0]['href']:
                    link = links[1]['href']
        return link
    def postprocess_html(self, soup, first_fetch):
        for tag in soup.findAll('div',text = "What's Your Reaction?"):
                tag.extract()
        for tg in soup.findAll('blockquote'):
            tg.extract()
        return soup
--- a/resources/recipes/johm.recipe
+++ b/resources/recipes/johm.recipe
@ -0,0 +1,87 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class JournalofHospitalMedicine(BasicNewsRecipe):
    title       = 'Journal of Hospital Medicine'
    __author__  = 'Krittika Goyal'
    description = 'Medical news'
    timefmt = ' [%d %b, %Y]'
    needs_subscription = True
    no_stylesheets = True
    #remove_tags_before = dict(name='div', attrs={'align':'center'})
    #remove_tags_after  = dict(name='ol', attrs={'compact':'COMPACT'})
    remove_tags = [
       dict(name='iframe'),
       dict(name='div', attrs={'class':'subContent'}),
       dict(name='div', attrs={'id':['contentFrame']}),
       #dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
       #dict(name='table', attrs={'align':'RIGHT'}),
    ]
   # TO LOGIN
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open('http://www3.interscience.wiley.com/cgi-bin/home')
        br.select_form(name='siteLogin')
        br['LoginName'] = self.username
        br['Password'] = self.password
        response = br.submit()
        raw = response.read()
        if 'userName = ""' in raw:
            raise Exception('Login failed. Check your username and password')
        return br
    #TO GET ARTICLE TOC
    def johm_get_index(self):
            return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
    # To parse artice toc
    def parse_index(self):
            parse_soup = self.johm_get_index()
            div = parse_soup.find(id='contentCell')
            current_section = None
            current_articles = []
            feeds = []
            for x in div.findAll(True):
                if x.name == 'h4':
                    # Section heading found
                    if current_articles and current_section:
                        feeds.append((current_section, current_articles))
                    current_section = self.tag_to_string(x)
                    current_articles = []
                    self.log('\tFound section:', current_section)
                if current_section is not None and x.name == 'strong':
                    title = self.tag_to_string(x)
                    p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
                    if p is None:
                        continue
                    url = p.get('href', False)
                    if not url or not title:
                        continue
                    if url.startswith('/'):
                         url = 'http://www3.interscience.wiley.com'+url
                    url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    #if url.startswith('/'):
                        #url = 'http://online.wsj.com'+url
                    current_articles.append({'title': title, 'url':url,
                        'description':'', 'date':''})
            if current_articles and current_section:
                feeds.append((current_section, current_articles))
            return feeds
    def preprocess_html(self, soup):
        for img in soup.findAll('img', src=True):
            img['src'] = img['src'].replace('tfig', 'nfig')
        return soup
--- a/resources/recipes/kathemerini.recipe
+++ b/resources/recipes/kathemerini.recipe
--- a/resources/recipes/kukuburi.recipe
+++ b/resources/recipes/kukuburi.recipe
@ -0,0 +1,37 @@
 #!/usr/bin/env  python
 __license__ = 'GPL v3'
 __author__ = 'Mori'
 __version__ = 'v. 0.1'
 '''
 Kukuburi.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class KukuburiRecipe(BasicNewsRecipe):
    __author__ = 'Mori'
    language = 'en'
    title = u'Kukuburi'
    publisher = u'Ramón Pérez'
    description =u'KUKUBURI by Ram\xc3\xb3n P\xc3\xa9rez'
    no_stylesheets = True
    remove_javascript = True
    oldest_article = 100
    max_articles_per_feed = 100
    feeds = [
        (u'Kukuburi', u'http://feeds2.feedburner.com/Kukuburi')
    ]
    preprocess_regexps = [
        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
        [
            (r'<!--.*?-->', lambda match: ''),
            (r'<div class="feedflare".*?</div>', lambda match: '')
        ]
    ]
--- a/resources/recipes/la_segunda.recipe
+++ b/resources/recipes/la_segunda.recipe
@ -1,7 +1,5 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lasegunda.com
 '''
@ -19,43 +17,38 @@ class LaSegunda(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    cover_url             = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
+    masthead_url          = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
-    remove_javascript     = True
+    remove_empty_feeds    = True
-    language = 'es'
+    language              = 'es'
    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '            
-    
+    conversion_options = {
-    html2lrf_options = [
+                          'comment'          : description
-                          '--comment', description
+                        , 'tags'             : category
-                        , '--category', category
+                        , 'publisher'        : publisher
-                        , '--publisher', publisher
+                        , 'language'         : language
-                        , '--ignore-tables'
+						, 'linearize_tables' : True
-                        ]
+                        }
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' 
-    keep_only_tags = [dict(name='table')]
+    remove_tags_before = dict(attrs={'class':'titulonegritastop'})
    remove_tags        = [dict(name='img')]
    remove_attributes  = ['width','height']
    feeds = [ 
               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
-              ,(u'Politica', u'http://www.lasegunda.com/rss20/index.asp?canal=21')
+              ,(u'Politica'               , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
-              ,(u'Cronica', u'http://www.lasegunda.com/rss20/index.asp?canal=20')
+              ,(u'Cronica'                , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
-              ,(u'Internacional', u'http://www.lasegunda.com/rss20/index.asp?canal=23')
+              ,(u'Internacional'          , u'http://www.lasegunda.com/rss20/index.asp?canal=23')
-              ,(u'Deportes', u'http://www.lasegunda.com/rss20/index.asp?canal=24')
+              ,(u'Deportes'               , u'http://www.lasegunda.com/rss20/index.asp?canal=24')
-              ,(u'Epectaculos/Cultura', u'http://www.lasegunda.com/rss20/index.asp?canal=25')
+              ,(u'Epectaculos/Cultura'    , u'http://www.lasegunda.com/rss20/index.asp?canal=25')
-              ,(u'Educacion', u'http://www.lasegunda.com/rss20/index.asp?canal=26')
+              ,(u'Educacion'              , u'http://www.lasegunda.com/rss20/index.asp?canal=26')
-              ,(u'Ciencia y Tecnologia', u'http://www.lasegunda.com/rss20/index.asp?canal=27')
+              ,(u'Ciencia y Tecnologia'   , u'http://www.lasegunda.com/rss20/index.asp?canal=27')
-              ,(u'Solidaridad', u'http://www.lasegunda.com/rss20/index.asp?canal=28')
+              ,(u'Solidaridad'            , u'http://www.lasegunda.com/rss20/index.asp?canal=28')
-              ,(u'Buena Vida', u'http://www.lasegunda.com/rss20/index.asp?canal=32')
+              ,(u'Buena Vida'             , u'http://www.lasegunda.com/rss20/index.asp?canal=32')
            ]
    def print_version(self, url):
        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/resources/recipes/la_tercera.recipe
+++ b/resources/recipes/la_tercera.recipe
@ -1,7 +1,5 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 latercera.com
 '''
@ -18,32 +16,32 @@ class LaTercera(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
    remove_javascript     = True
    use_embedded_content  = False
-
+    remove_empty_feeds    = True
-    html2lrf_options = [
+    language              = 'es'
-                          '--comment', description
+ 
-                        , '--category', category
+    conversion_options = {
-                        , '--publisher', publisher
+                          'comment'          : description
-                        ]
+                        , 'tags'             : category
-
+                        , 'publisher'        : publisher
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+                        , 'language'         : language
 						, 'linearize_tables' : True
                        }
    keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
    remove_tags = [
-                     dict(name='script')
+                     dict(name=['ul','input','base'])
                    ,dict(name='ul')
                    ,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
                    ,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
                    ,dict(name='input')
                    ,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
                  ]
    feeds = [
               (u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
-              ,(u'Pais', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=654')
+              ,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
              ,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')              
              ,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
              ,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
              ,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
@ -55,10 +53,6 @@ class LaTercera(BasicNewsRecipe):
            ]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    language = 'es'
--- a/resources/recipes/new_scientist.recipe
+++ b/resources/recipes/new_scientist.recipe
@ -1,11 +1,11 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
 '''
 newscientist.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class NewScientist(BasicNewsRecipe):
@ -15,12 +15,14 @@ class NewScientist(BasicNewsRecipe):
    language              = 'en'
    publisher             = 'New Scientist'
    category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
    delay                 = 3
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    cover_url             = 'http://www.newscientist.com/currentcover.jpg'
    masthead_url          = 'http://www.newscientist.com/img/misc/ns_logo.jpg'
    encoding              = 'utf-8'
    extra_css             = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} '
    conversion_options = {
                          'comment'          : description
@ -28,14 +30,18 @@ class NewScientist(BasicNewsRecipe):
                        , 'publisher'        : publisher
                        , 'language'         : language
                        }
    preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]
-    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol']})]
+    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})]
    remove_tags = [
-                     dict(name='div', attrs={'class':['hldBd','adline','pnl','infotext' ]})
+                     dict(name='div'  , attrs={'class':['hldBd','adline','pnl','infotext' ]})
-                    ,dict(name='div', attrs={'id'   :['compnl','artIssueInfo','artTools']})
+                    ,dict(name='div'  , attrs={'id'   :['compnl','artIssueInfo','artTools']})
-                    ,dict(name='p'  , attrs={'class':['marker','infotext'               ]})
+                    ,dict(name='p'    , attrs={'class':['marker','infotext'               ]})
                    ,dict(name='meta' , attrs={'name' :'description'                       })
                  ]
    remove_tags_after = dict(attrs={'class':'nbpcopy'})
    remove_attributes = ['height','width']
    feeds          = [
                        (u'Latest Headlines'        , u'http://feeds.newscientist.com/science-news'              )
@ -50,9 +56,15 @@ class NewScientist(BasicNewsRecipe):
                     ]
    def get_article_url(self, article):
-        url = article.get('guid',  None)
+        return article.get('guid',  None)
        return url
    def print_version(self, url):
        return url + '?full=true&print=true'
    def preprocess_html(self, soup):
        for tg in soup.findAll('a'):
            if tg.string == 'Home':
                tg.parent.extract()
                return self.adeify_images(soup)
        return self.adeify_images(soup)
--- a/resources/recipes/nspm.recipe
+++ b/resources/recipes/nspm.recipe
@ -1,7 +1,5 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 nspm.rs
 '''
@ -21,17 +19,16 @@ class Nspm(BasicNewsRecipe):
    use_embedded_content  = False
    INDEX                 = 'http://www.nspm.rs/?alphabet=l'
    encoding              = 'utf-8'
-    language = 'sr'
+    language              = 'sr'
-
+    masthead_url          = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
    lang                  = 'sr-Latn-RS'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
-                        , 'pretty_print'     : True
+                        , 'linearize_tables' : True
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -39,6 +36,8 @@ class Nspm(BasicNewsRecipe):
                            dict(name=['link','object','embed'])
                           ,dict(name='td', attrs={'class':'buttonheading'})
                         ]
    remove_tags_after = dict(attrs={'class':'article_separator'})
    remove_attributes = ['width','height']
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -51,17 +50,6 @@ class Nspm(BasicNewsRecipe):
        return url.replace('.html','/stampa.html')
    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
+        for item in soup.body.findAll(style=True):
-        soup.html['lang']     = self.lang
+            del item['style']
        attribs = [  'style','font','valign'
                    ,'colspan','width','height'
                    ,'rowspan','summary','align'
                    ,'cellspacing','cellpadding'
                    ,'frames','rules','border'
                  ]
        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
            item.name = 'div'
            for attrib in attribs:
                if item.has_key(attrib):
                   del item[attrib]
        return self.adeify_images(soup)
--- a/resources/recipes/san_fran_chronicle.recipe
+++ b/resources/recipes/san_fran_chronicle.recipe
@ -7,10 +7,11 @@ sfgate.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class SanFranciscoChronicle(BasicNewsRecipe):
    title                 = u'San Francisco Chronicle'
-    __author__            = u'Darko Miletic'
+    __author__            = u'Darko Miletic and Sujata Raman'
    description           = u'San Francisco news'
    language = 'en'
@ -19,13 +20,56 @@ class SanFranciscoChronicle(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
-    remove_tags_before = {'class':'articleheadings'}
+
-    remove_tags_after =  dict(name='div', attrs={'id':'articlecontent' })
+
-    remove_tags = [
+    remove_tags_before  = {'id':'printheader'}
-                     dict(name='div', attrs={'class':'tools tools_top'})
+
-                    ,dict(name='div', attrs={'id':'articlebox'        })
+    remove_tags         = [
-                  ]
+                            dict(name='div',attrs={'id':'printheader'})
                           ,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
                           ,dict(name='div',attrs={'id':'footer'})
                          ]
    extra_css       = '''
                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
                        .byline{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
                        .date{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
                        .dtlcomment{font-style:italic;}
                        .georgia h3{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#000000;}
                     '''
    feeds          = [
                         (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
                     ]
    def print_version(self,url):
        url= url +"&type=printable"
        return url
    def get_article_url(self, article):
        print str(article['title_detail']['value'])
        url = article.get('guid',None)
        url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
        if "Presented By:" in str(article['title_detail']['value']):
            url = ''
        return url
--- a/resources/recipes/sfbg.recipe
+++ b/resources/recipes/sfbg.recipe
@ -0,0 +1,42 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class SanFranciscoBayGuardian(BasicNewsRecipe):
    title          = u'San Francisco Bay Guardian'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    #encoding = 'latin1'
    no_stylesheets = True
    remove_tags_before = dict(name='div', attrs={'id':'story_header'})
    remove_tags_after  = dict(name='div', attrs={'id':'shirttail'})
    remove_tags = [
       dict(name='iframe'),
       #dict(name='div', attrs={'class':'related-articles'}),
        dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}),
       #dict(name='ul', attrs={'class':'article-tools'}),
       dict(name='ul', attrs={'id':'story_tabs'}),
    ]
    feeds = [
        ('Cover', 'http://www.newsobserver.com/100/index.rss'),
        ('News', 'http://www.newsobserver.com/102/index.rss'),
        ('Politics', 'http://www.newsobserver.com/105/index.rss'),
        ('Business', 'http://www.newsobserver.com/104/index.rss'),
        ('Sports', 'http://www.newsobserver.com/103/index.rss'),
        ('College Sports', 'http://www.newsobserver.com/119/index.rss'),
        ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
        ('Editorials', 'http://www.newsobserver.com/158/index.rss')]
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'id':'story_body'})
        #td = heading.findParent(name='td')
        #td.extract()
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
--- a/resources/recipes/smith.recipe
+++ b/resources/recipes/smith.recipe
@ -0,0 +1,52 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class SmithsonianMagazine(BasicNewsRecipe):
    title          = u'Smithsonian Magazine'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 31#days
    max_articles_per_feed = 50
    #encoding = 'latin1'
    recursions = 1
    match_regexps = ['&page=[2-9]$']
    remove_stylesheets = True
    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
    remove_tags_after  = dict(name='p', attrs={'id':'articlePaginationWrapper'})
    remove_tags = [
       dict(name='iframe'),
       dict(name='div', attrs={'class':'article_sidebar_border'}),
       dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large']}),
       #dict(name='ul', attrs={'class':'article-tools'}),
       dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}),
    ]
    feeds          = [
 ('History and Archeology',
 'http://feeds.feedburner.com/smithsonianmag/history-archaeology'),
 ('People and Places',
 'http://feeds.feedburner.com/smithsonianmag/people-places'),
 ('Science and Nature',
 'http://feeds.feedburner.com/smithsonianmag/science-nature'),
 ('Arts and Culture',
 'http://feeds.feedburner.com/smithsonianmag/arts-culture'),
 ('Travel',
 'http://feeds.feedburner.com/smithsonianmag/travel'),
 ]
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'id':'article-left'})
        #td = heading.findParent(name='td')
        #td.extract()
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
    def postprocess_html(self, soup, first):
        for p in soup.findAll(id='articlePaginationWrapper'): p.extract()
        if not first:
             for div in soup.findAll(id='article-head'): div.extract()
        return soup
--- a/resources/recipes/strategy-business.recipe
+++ b/resources/recipes/strategy-business.recipe
@ -9,16 +9,35 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
    title = u'Strategy+Business'
    publisher = u' Booz & Company'
    category = u'Business'
-    description = u'Business magazine for senior business executives and the people who influence them.'
+    description = (u'Business magazine for senior business executives and the people who influence them.'
            'Go to http://www.strategy-business.com/registration to sign up for a free account')
    oldest_article = 13 * 7 # 3 months
    max_articles_per_feed = 100
    use_embedded_content = False
    remove_empty_feeds = True
    needs_subscription = True
    no_stylesheets = True
    remove_javascript = True
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open('http://www.strategy-business.com/registration')
        for i, f in enumerate(br.forms()):
            if 'gatekeeper_edit' in f.name:
                br.select_form(name=f.name)
                for c in f.controls:
                    if c.name.endswith('_email'):
                        br[c.name] = self.username
                    elif c.name.endswith('_password'):
                        br[c.name] = self.password
                raw = br.submit().read()
                if '>Logout' not in raw:
                    raise ValueError('Failed to login, check your username and password')
        return br
    extra_css = '''
                body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
                a {text-decoration: none; color: blue;}
--- a/resources/recipes/tanea.recipe
+++ b/resources/recipes/tanea.recipe
--- a/resources/recipes/the_escapist.recipe
+++ b/resources/recipes/the_escapist.recipe
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class al(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini'
-    description   = 'the Escapist Magazine'
+    description   = 'The Escapist Magazine'
    cover_url      = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
    title          = u'the Escapist Magazine'
--- a/resources/recipes/thecultofghoul.recipe
+++ b/resources/recipes/thecultofghoul.recipe
@ -0,0 +1,39 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 cultofghoul.blogspot.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheCultOfGhoul(BasicNewsRecipe):
    title                 = 'The Cult of Ghoul'
    __author__            = 'Darko Miletic'
    description           = 'Filmski blog'    
    oldest_article        = 7
    max_articles_per_feed = 100
    language              = 'sr'
    encoding              = 'utf-8'
    no_stylesheets        = True
    use_embedded_content  = True
    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : 'film, blog, srbija, strava, uzas'
                        , 'publisher': 'Dejan Ognjanovic'
                        , 'language' : language
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    feeds = [(u'Posts', u'http://cultofghoul.blogspot.com/feeds/posts/default')]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return self.adeify_images(soup)
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@ -50,7 +50,11 @@ class WallStreetJournal(BasicNewsRecipe):
                br.select_form(nr=0)
                br['user']   = self.username
                br['password'] = self.password
-                br.submit()
+                res = br.submit()
                raw = res.read()
                if 'Welcome,' not in raw:
                    raise ValueError('Failed to log in to wsj.com, check your '
                            'username and password')
            return br
        def postprocess_html(self, soup, first):
@ -69,8 +73,10 @@ class WallStreetJournal(BasicNewsRecipe):
            soup = self.wsj_get_index()
            year = strftime('%Y')
-            for x in soup.findAll('td', attrs={'class':'b14'}):
+            for x in soup.findAll('td', height='25', attrs={'class':'b14'}):
                txt = self.tag_to_string(x).strip()
                txt = txt.replace(u'\xa0', ' ')
                txt = txt.encode('ascii', 'ignore')
                if year in txt:
                    self.timefmt = ' [%s]'%txt
                    break
--- a/setup/init.py
+++ b/setup/init.py
@ -11,7 +11,8 @@ import sys, re, os, platform
 is64bit = platform.architecture()[0] == '64bit'
 iswindows = re.search('win(32|64)', sys.platform)
 isosx = 'darwin' in sys.platform
-islinux = not isosx and not iswindows
+isfreebsd = 'freebsd' in sys.platform
 islinux = not isosx and not iswindows and not isfreebsd
 SRC = os.path.abspath('src')
 sys.path.insert(0, SRC)
 sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
@ -117,7 +118,7 @@ class Command(object):
        self.real_user = os.environ.get('SUDO_USER', None)
    def drop_privileges(self):
-        if not islinux or isosx:
+        if not islinux or isosx or isfreebsd:
            return
        if self.real_user is not None:
            self.info('Dropping privileges to those of', self.real_user+':',
@ -128,7 +129,7 @@ class Command(object):
            os.seteuid(int(self.real_uid))
    def regain_privileges(self):
-        if not islinux or isosx:
+        if not islinux or isosx or isfreebsd:
            return
        if os.geteuid() != 0 and self.orig_euid == 0:
            self.info('Trying to get root privileges')
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
 fc_lib = '/usr/lib'
 podofo_inc = '/usr/include/podofo'
 podofo_lib = '/usr/lib'
 chmlib_inc_dirs = chmlib_lib_dirs = []
 if iswindows:
    prefix  = r'C:\cygwin\home\kovid\sw'
@ -96,6 +97,10 @@ if iswindows:
    sw_lib_dir  = os.path.join(prefix, 'lib')
    fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
    fc_lib = sw_lib_dir
    chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
        'build', 'chmlib-0.40', 'src'))
    chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
        'build', 'chmlib-0.40', 'src', 'Release'))
    png_inc_dirs = [sw_inc_dir]
    png_lib_dirs = [sw_lib_dir]
    png_libs = ['png12']
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -11,15 +11,16 @@ from distutils import sysconfig
 from PyQt4.pyqtconfig import QtGuiModuleMakefile
-from setup import Command, islinux, isosx, SRC, iswindows
+from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
-from setup.build_environment import fc_inc, fc_lib, \
+from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
        fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
        podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
        QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
        magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
-        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
+        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
        jpg_lib_dirs, chmlib_lib_dirs
 MT
-isunix = islinux or isosx
+isunix = islinux or isosx or isfreebsd
 make = 'make' if isunix else NMAKE
@ -56,6 +57,22 @@ if iswindows:
    pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
 extensions = [
    Extension('chmlib',
            ['calibre/utils/chm/swig_chm.c'],
            libraries=['ChmLib' if iswindows else 'chm'],
            inc_dirs=chmlib_inc_dirs,
            lib_dirs=chmlib_lib_dirs,
            cflags=["-DSWIG_COBJECT_TYPES"]),
    Extension('chm_extra',
            ['calibre/utils/chm/extra.c'],
            libraries=['ChmLib' if iswindows else 'chm'],
            inc_dirs=chmlib_inc_dirs,
            lib_dirs=chmlib_lib_dirs,
            cflags=["-D__PYTHON__"]),
    Extension('pdfreflow',
                reflow_sources,
                headers=reflow_headers,
@ -126,7 +143,7 @@ extensions = [
 if iswindows:
    extensions.append(Extension('winutil',
                ['calibre/utils/windows/winutil.c'],
-                libraries=['shell32', 'setupapi'],
+                libraries=['shell32', 'setupapi', 'wininet'],
                cflags=['/X']
                ))
@ -154,6 +171,13 @@ if islinux:
    ldflags.append('-lpython'+sysconfig.get_python_version())
 if isfreebsd:
    cflags.append('-pthread')
    ldflags.append('-shared')
    cflags.append('-I'+sysconfig.get_python_inc())
    ldflags.append('-lpython'+sysconfig.get_python_version())
 if isosx:
    x, p = ('i386', 'ppc')
    archs = ['-arch', x, '-arch', p, '-isysroot',
--- a/setup/install.py
+++ b/setup/install.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
-from setup import Command, islinux, basenames, modules, functions, \
+from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
        __appname__, __version__
 HEADER = '''\
@ -116,7 +116,7 @@ class Develop(Command):
    def pre_sub_commands(self, opts):
-        if not islinux:
+        if not (islinux or isfreebsd):
            self.info('\nSetting up a source based development environment is only '
                    'supported on linux. On other platforms, see the User Manual'
                    ' for help with setting up a development environment.')
@ -156,7 +156,7 @@ class Develop(Command):
            self.warn('Failed to compile mount helper. Auto mounting of',
                ' devices will not work')
-        if os.geteuid() != 0:
+        if not isfreebsd and os.geteuid() != 0:
            return self.warn('Must be run as root to compile mount helper. Auto '
                    'mounting of devices will not work.')
        src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
@ -168,9 +168,10 @@ class Develop(Command):
        ret = p.wait()
        if ret != 0:
            return warn()
-        os.chown(dest, 0, 0)
+        if not isfreebsd:
-        os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
+            os.chown(dest, 0, 0)
-                stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
+            os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
                    stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
        self.manifest.append(dest)
        return dest
--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -42,6 +42,7 @@ class LinuxFreeze(Command):
                        '/usr/lib/liblcms.so.1',
                        '/tmp/calibre-mount-helper',
                        '/usr/lib/libunrar.so',
                        '/usr/lib/libchm.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libmng.so.1',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -459,7 +459,7 @@ class Py2App(object):
    @flush
    def add_misc_libraries(self):
-        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
+        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
            info('\nAdding', x)
            x = 'lib%s.dylib'%x
            shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -12,7 +12,7 @@ warnings.simplefilter('ignore', DeprecationWarning)
 from calibre.startup import plugins, winutil, winutilerror
-from calibre.constants import iswindows, isosx, islinux, isfrozen, \
+from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
                              terminal_controller, preferred_encoding, \
                              __appname__, __version__, __author__, \
                              win32event, win32api, winerror, fcntl, \
@ -22,7 +22,7 @@ import mechanize
 if False:
    winutil, winutilerror, __appname__, islinux, __version__
    fcntl, win32event, isfrozen, __author__, terminal_controller
-    winerror, win32api
+    winerror, win32api, isfreebsd
 mimetypes.add_type('application/epub+zip',                '.epub')
 mimetypes.add_type('text/x-sony-bbeb+xml',                '.lrs')
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.42'
+__version__   = '0.6.44'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re
@ -22,7 +22,8 @@ terminal_controller = TerminalController(sys.stdout)
 iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
 isosx     = 'darwin' in sys.platform.lower()
 isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
-islinux   = not(iswindows or isosx)
+isfreebsd = 'freebsd' in sys.platform.lower()
 islinux   = not(iswindows or isosx or isfreebsd)
 isfrozen  = hasattr(sys, 'frozen')
 isunix = isosx or islinux
@ -56,7 +57,8 @@ if plugins is None:
        sys.path.insert(0, plugin_path)
        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
-            'fontconfig', 'pdfreflow', 'progress_indicator'] + \
+            'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
            'chm_extra'] + \
                    (['winutil'] if iswindows else []) + \
                    (['usbobserver'] if isosx else []):
            try:
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -119,11 +119,34 @@ class Plugin(object):
    def __enter__(self, *args):
        if self.plugin_path is not None:
-            sys.path.insert(0, self.plugin_path)
+            from calibre.utils.zipfile import ZipFile
            zf = ZipFile(self.plugin_path)
            extensions = set([x.rpartition('.')[-1].lower() for x in
                zf.namelist()])
            zip_safe = True
            for ext in ('pyd', 'so', 'dll', 'dylib'):
                if ext in extensions:
                    zip_safe = False
            if zip_safe:
                sys.path.insert(0, self.plugin_path)
                self.sys_insertion_path = self.plugin_path
            else:
                from calibre.ptempfile import TemporaryDirectory
                self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
                self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
                zf.extractall(self.sys_insertion_path)
                sys.path.insert(0, self.sys_insertion_path)
            zf.close()
    def __exit__(self, *args):
-        if self.plugin_path in sys.path:
+        ip, it = getattr(self, 'sys_insertion_path', None), getattr(self,
-            sys.path.remove(self.plugin_path)
+                '_sys_insertion_tdir', None)
        if ip in sys.path:
            sys.path.remove(ip)
        if hasattr(it, '__exit__'):
            it.__exit__(*args)
 class FileTypePlugin(Plugin):
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -70,9 +70,10 @@ class PML2PMLZ(FileTypePlugin):
        pmlz = zipfile.ZipFile(of.name, 'w')
        pmlz.write(pmlfile, os.path.basename(pmlfile))
-        pml_img = os.path.basename(pmlfile)[0] + '_img'
+        pml_img = os.path.splitext(pmlfile)[0] + '_img'
-        img_dir = pml_img if os.path.exists(pml_img) else 'images' if \
+        i_img = os.path.join(os.path.dirname(pmlfile),'images')
-            os.path.exists('images') else ''
+        img_dir = pml_img if os.path.isdir(pml_img) else i_img if \
            os.path.isdir(i_img) else ''
        if img_dir:
            for image in glob.glob(os.path.join(img_dir, '*.png')):
                pmlz.write(image, os.path.join('images', (os.path.basename(image))))
@ -81,17 +82,6 @@ class PML2PMLZ(FileTypePlugin):
        return of.name
 # CHM MODIFIED
 class CHMMetadataReader(MetadataReaderPlugin):
    name        = 'Read CHM metadata'
    file_types  = set(['chm'])
    description = _('Read metadata from %s files') % 'CHM'
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.chm import get_metadata
        return get_metadata(stream)
 class ComicMetadataReader(MetadataReaderPlugin):
    name = 'Read comic metadata'
@ -113,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
            mi.cover_data = (ext.lower(), data)
        return mi
 class CHMMetadataReader(MetadataReaderPlugin):
    name        = 'Read CHM metadata'
    file_types  = set(['chm'])
    description = _('Read metadata from %s files') % 'CHM'
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.chm.metadata import get_metadata
        return get_metadata(stream)
 class EPUBMetadataReader(MetadataReaderPlugin):
    name        = 'Read EPUB metadata'
@ -394,7 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.tcr.input import TCRInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lrf.input import LRFInput
-from calibre.ebooks.chm.input import CHMInput # CHM MODIFIED
+from calibre.ebooks.chm.input import CHMInput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.fb2.output import FB2Output
@ -418,7 +419,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
-                BOOQ
+                BOOQ, ELONEX
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK
@ -433,6 +434,7 @@ from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
 from calibre.devices.binatone.driver import README
 from calibre.devices.hanvon.driver import N516, EB511
 from calibre.devices.teclast.driver import TECLAST_K3
 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
@ -454,7 +456,7 @@ plugins += [
    TCRInput,
    TXTInput,
    LRFInput,
-    CHMInput, # CHM MODIFIED
+    CHMInput,
 ]
 plugins += [
    EPUBOutput,
@ -508,6 +510,8 @@ plugins += [
    README,
    N516,
    EB511,
    ELONEX,
    TECLAST_K3
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -214,8 +214,21 @@ class InputFormatPlugin(Plugin):
        return ret
    def postprocess_book(self, oeb, opts, log):
        '''
        Called to allow the input plugin to perform postprocessing after
        the book has been parsed.
        '''
        pass
    def specialize(self, oeb, opts, log, output_fmt):
        '''
        Called to allow the input plugin to specialize the parsed book
        for a particular output format. Called after postprocess_book
        and before any transforms are performed on the parsed book.
        '''
        pass
 class OutputFormatPlugin(Plugin):
    '''
    OutputFormatPlugins are responsible for converting an OEB document
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -235,7 +235,7 @@ class SonyReaderOutput(OutputProfile):
    description = _('This profile is intended for the SONY PRS line. '
                    'The 500/505/600/700 etc.')
-    screen_size               = (600, 775)
+    screen_size               = (590, 775)
    dpi                       = 168.451
    fbase                     = 12
    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -235,6 +235,8 @@ def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
        with plugin:
            try:
                nfp = plugin.run(path_to_file)
                if not nfp:
                    nfp = path_to_file
            except:
                print 'Running file type plugin %s failed with traceback:'%plugin.name
                traceback.print_exc()
@ -399,7 +401,7 @@ def initialize_plugins():
                plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
            except PluginNotFound:
                continue
-            plugin = initialize_plugin(plugin, zfp if not isinstance(zfp, type) else zfp)
+            plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
            _initialized_plugins.append(plugin)
        except:
            print 'Failed to initialize plugin...'
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -23,6 +23,8 @@ Run an embedded python interpreter.
                      help='Debug the specified device driver.')
    parser.add_option('-g', '--gui',  default=False, action='store_true',
                      help='Run the GUI',)
    parser.add_option('-w', '--viewer',  default=False, action='store_true',
                      help='Run the ebook viewer',)
    parser.add_option('--paths', default=False, action='store_true',
            help='Output the paths necessary to setup the calibre environment')
    parser.add_option('--migrate', action='store_true', default=False,
@ -98,6 +100,12 @@ def main(args=sys.argv):
    if opts.gui:
        from calibre.gui2.main import main
        main(['calibre'])
    elif opts.viewer:
        from calibre.gui2.viewer.main import main
        vargs = ['ebook-viewer', '--debug-javascript']
        if len(args) > 1:
            vargs.append(args[-1])
        main(vargs)
    elif opts.command:
        sys.argv = args[:1]
        exec opts.command
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -60,8 +60,10 @@ def debug(ioreg_to_tmp=False, buf=None):
        if isosx:
            from calibre.devices.usbms.device import Device
            mount = repr(Device.osx_run_mount())
-            ioreg = Device.run_ioreg()
+            drives = pprint.pformat(Device.osx_get_usb_drives())
-            ioreg = 'Output from mount:\n\n'+mount+'\n\n'+ioreg
+            ioreg = 'Output from mount:\n'+mount+'\n\n'
            ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
            ioreg += Device.run_ioreg()
        connected_devices = []
        for dev in device_plugins():
            out('Looking for', dev.__class__.__name__)
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -15,7 +15,7 @@ class ANDROID(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']
    # Ordered list of supported formats
-    FORMATS     = ['epub']
+    FORMATS     = ['epub', 'pdf']
    VENDOR_ID   = {
            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -195,3 +195,15 @@ class BOOQ(EB600):
    WINDOWS_MAIN_MEM = 'EB600'
    WINDOWS_CARD_A_MEM = 'EB600'
 class ELONEX(EB600):
    name = 'Elonex 600EB'
    gui_name = 'Elonex'
    FORMATS = ['epub', 'pdf', 'txt', 'html']
    VENDOR_NAME = 'ELONEX'
    WINDOWS_MAIN_MEM = 'EBOOK'
    WINDOWS_CARD_A_MEM = 'EBOOK'
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -6,6 +6,7 @@ the GUI. A device backend must subclass the L{Device} class. See prs500.py for
 a backend that implement the Device interface for the SONY PRS500 Reader.
 """
 import os
 from collections import namedtuple
 from calibre.customize import Plugin
 from calibre.constants import iswindows
@ -43,6 +44,9 @@ class DevicePlugin(Plugin):
    #: Icon for this device
    icon = I('reader.svg')
    # Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
    UserAnnotation = namedtuple('Annotation','type, bookmark')
    @classmethod
    def get_gui_name(cls):
        if hasattr(cls, 'gui_name'):
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -7,10 +7,9 @@ __docformat__ = 'restructuredtext en'
 '''
 Device driver for Amazon's Kindle
 '''
-
+import os, re, sys
-import os
+from cStringIO import StringIO
-import re
+from struct import unpack
 import sys
 from calibre.devices.usbms.driver import USBMS
@ -44,6 +43,7 @@ class KINDLE(USBMS):
    EBOOK_DIR_CARD_A = 'documents'
    DELETE_EXTS = ['.mbp']
    SUPPORTS_SUB_DIRS = True
    SUPPORTS_ANNOTATIONS = True
    WIRELESS_FILE_NAME_PATTERN = re.compile(
    r'(?P<title>[^-]+)-asin_(?P<asin>[a-zA-Z\d]{10,})-type_(?P<type>\w{4})-v_(?P<index>\d+).*')
@ -60,6 +60,73 @@ class KINDLE(USBMS):
                                               'replace')
        return mi
    def get_annotations(self, path_map):
        MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt']
        TAN_FORMATS = [u'tpz', u'azw1']
        mbp_formats = set()
        for fmt in MBP_FORMATS:
            mbp_formats.add(fmt)
        tan_formats = set()
        for fmt in TAN_FORMATS:
            tan_formats.add(fmt)
        def get_storage():
            storage = []
            if self._main_prefix:
                storage.append(os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN))
            if self._card_a_prefix:
                storage.append(os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A))
            if self._card_b_prefix:
                storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B))
            return storage
        def resolve_bookmark_paths(storage, path_map):
            pop_list = []
            book_ext = {}
            for id in path_map:
                file_fmts = set()
                for fmt in path_map[id]['fmts']:
                    file_fmts.add(fmt)
                bookmark_extension = None
                if file_fmts.intersection(mbp_formats):
                    book_extension = list(file_fmts.intersection(mbp_formats))[0]
                    bookmark_extension = 'mbp'
                elif file_fmts.intersection(tan_formats):
                    book_extension = list(file_fmts.intersection(tan_formats))[0]
                    bookmark_extension = 'tan'
                if bookmark_extension:
                    for vol in storage:
                        bkmk_path = path_map[id]['path'].replace(os.path.abspath('/<storage>'),vol)
                        bkmk_path = bkmk_path.replace('bookmark',bookmark_extension)
                        if os.path.exists(bkmk_path):
                            path_map[id] = bkmk_path
                            book_ext[id] = book_extension
                            break
                    else:
                        pop_list.append(id)
                else:
                    pop_list.append(id)
            # Remove non-existent bookmark templates
            for id in pop_list:
                path_map.pop(id)
            return path_map, book_ext
        storage = get_storage()
        path_map, book_ext = resolve_bookmark_paths(storage, path_map)
        bookmarked_books = {}
        for id in path_map:
            bookmark_ext = path_map[id].rpartition('.')[2]
            myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
            bookmarked_books[id] = self.UserAnnotation(type='kindle', bookmark=myBookmark)
        # This returns as job.result in gui2.ui.annotations_fetched(self,job)
        return bookmarked_books
 class KINDLE2(KINDLE):
@ -79,3 +146,213 @@ class KINDLE_DX(KINDLE2):
    PRODUCT_ID = [0x0003]
    BCD        = [0x0100]
 class Bookmark():
    '''
    A simple class fetching bookmark data
    Kindle-specific
    '''
    def __init__(self, path, id, book_format, bookmark_extension):
        self.book_format = book_format
        self.bookmark_extension = bookmark_extension
        self.book_length = 0
        self.id = id
        self.last_read = 0
        self.last_read_location = 0
        self.timestamp = 0
        self.user_notes = None
        self.get_bookmark_data(path)
        self.get_book_length(path)
        try:
            self.percent_read = float(100*self.last_read / self.book_length)
        except:
            self.percent_read = 0
    def record(self, n):
        from calibre.ebooks.metadata.mobi import StreamSlicer
        if n >= self.nrecs:
            raise ValueError('non-existent record %r' % n)
        offoff = 78 + (8 * n)
        start, = unpack('>I', self.data[offoff + 0:offoff + 4])
        stop = None
        if n < (self.nrecs - 1):
            stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
        return StreamSlicer(self.stream, start, stop)
    def get_bookmark_data(self, path):
        ''' Return the timestamp and last_read_location '''
        from calibre.ebooks.metadata.mobi import StreamSlicer
        user_notes = {}
        if self.bookmark_extension == 'mbp':
            MAGIC_MOBI_CONSTANT = 150
            with open(path,'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.timestamp, = unpack('>I', data[0x24:0x28])
                bpar_offset, = unpack('>I', data[0x4e:0x52])
                lrlo = bpar_offset + 0x0c
                self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
                self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
                entries, = unpack('>I', data[0x4a:0x4e])
                # Store the annotations/locations
                bpl = bpar_offset + 4
                bpar_len, = unpack('>I', data[bpl:bpl+4])
                bpar_len += 8
                #print "bpar_len: 0x%x" % bpar_len
                eo = bpar_offset + bpar_len
                # Walk bookmark entries
                #print " --- %s --- " % path
                current_entry = 1
                sig = data[eo:eo+4]
                previous_block = None
                while sig == 'DATA':
                    text = None
                    entry_type = None
                    rec_len, = unpack('>I', data[eo+4:eo+8])
                    if rec_len == 0:
                        current_block = "empty_data"
                    elif  data[eo+8:eo+12] == "EBAR":
                        current_block = "data_header"
                        #entry_type = "data_header"
                        location, = unpack('>I', data[eo+0x34:eo+0x38])
                        #print "data_header location: %d" % location
                    else:
                        current_block = "text_block"
                        if previous_block == 'empty_data':
                            entry_type = 'Note'
                        elif previous_block == 'data_header':
                            entry_type = 'Highlight'
                        text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
                    if entry_type:
                        displayed_location = location/MAGIC_MOBI_CONSTANT + 1
                        user_notes[location] = dict(id=self.id,
                                                    displayed_location=displayed_location,
                                                    type=entry_type,
                                                    text=text)
                    eo += rec_len + 8
                    current_entry += 1
                    previous_block = current_block
                    sig = data[eo:eo+4]
                while sig == 'BKMK':
                    # Fix start location for Highlights using BKMK data
                    end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
                    if end_loc in user_notes and user_notes[end_loc]['type'] == 'Highlight':
                        start, = unpack('>I', data[eo+8:eo+12])
                        user_notes[start] = user_notes[end_loc]
                        user_notes.pop(end_loc)
                    elif end_loc in user_notes and user_notes[end_loc]['type'] == 'Note':
                        # Skip duplicate bookmarks for notes
                        pass
                    else:
                        # If a bookmark coincides with a user annotation, the locs could
                        # be the same - cheat by nudging -1
                        # Skip bookmark for last_read_location
                        if end_loc != self.last_read:
                            displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
                            user_notes[end_loc - 1] = dict(id=self.id,
                                                           displayed_location=displayed_location,
                                                           type='Bookmark',
                                                           text=None)
                    rec_len, = unpack('>I', data[eo+4:eo+8])
                    eo += rec_len + 8
                    sig = data[eo:eo+4]
        elif self.bookmark_extension == 'tan':
            # TAN bookmarks
            MAGIC_TOPAZ_CONSTANT = 33.33
            self.timestamp = os.path.getmtime(path)
            with open(path,'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                while current_entry < entries:
                    location, = unpack('>I', data[e_base+2:e_base+6])
                    text = None
                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
                    e_type, = unpack('>B', data[e_base+1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = "(Topaz highlights not yet supported)"
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base+0x10:e_base+0x10+text_len]
                    else:
                        e_type = 'Unknown annotation type'
                    if self.book_format in ['tpz','azw1']:
                        # *** This needs fine-tuning
                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
                    elif self.book_format == 'pdf':
                        # *** This needs testing
                        displayed_location = location
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                for location in user_notes:
                    if location == self.last_read:
                        user_notes.pop(location)
                        break
        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
        self.user_notes = user_notes
        '''
        for location in sorted(user_notes):
            print '  Location %d: %s\n%s' % (user_notes[location]['displayed_location'],
                                                     user_notes[location]['type'],
                                    '\n'.join(self.textdump(user_notes[location]['text'])))
        '''
    def get_book_length(self, path):
        from calibre.ebooks.metadata.mobi import StreamSlicer
        book_fs = path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
        self.book_length = 0
        if self.bookmark_extension == 'mbp':
            # Read the book len from the header
            with open(book_fs,'rb') as f:
                self.stream = StringIO(f.read())
                self.data = StreamSlicer(self.stream)
                self.nrecs, = unpack('>H', self.data[76:78])
                record0 = self.record(0)
                self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
        elif self.bookmark_extension == 'tan':
            # Read bookLength from metadata
            with open(book_fs,'rb') as f:
                stream = StringIO(f.read())
                raw = stream.read(8*1024)
                if not raw.startswith('TPZ'):
                    raise ValueError('Not a Topaz file')
                first = raw.find('metadata')
                if first < 0:
                    raise ValueError('Invalid Topaz file')
                second = raw.find('metadata', first+10)
                if second < 0:
                    raise ValueError('Invalid Topaz file')
                raw = raw[second:second+1000]
                idx = raw.find('bookLength')
                if idx > -1:
                    length = ord(raw[idx+len('bookLength')])
                    self.book_length = int(raw[idx+len('bookLength')+1:idx+len('bookLength')+1+length])
        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
--- a/src/calibre/devices/libusb.py
+++ b/src/calibre/devices/libusb.py
@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
                   c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
 from errno import EBUSY, ENOMEM
-from calibre import iswindows, isosx, load_library
+from calibre import iswindows, isosx, isfreebsd, load_library
 _libusb_name = 'libusb'
-PATH_MAX = 511 if iswindows else 1024 if isosx else 4096
+PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
 if iswindows:
    class Structure(_Structure):
        _pack_ = 1
--- a/src/calibre/devices/teclast/init.py
+++ b/src/calibre/devices/teclast/init.py
@ -0,0 +1,10 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
--- a/src/calibre/devices/teclast/driver.py
+++ b/src/calibre/devices/teclast/driver.py
@ -0,0 +1,42 @@
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.usbms.driver import USBMS
 class TECLAST_K3(USBMS):
    name           = 'Teclast K3 Device Interface'
    gui_name       = 'K3'
    description    = _('Communicate with the Teclast K3 reader.')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    # Ordered list of supported formats
    FORMATS     = ['epub', 'fb2', 'doc', 'pdf', 'txt']
    VENDOR_ID   = [0x071b]
    PRODUCT_ID  = [0x3203]
    BCD         = [0x0000]
    VENDOR_NAME      = 'TECLAST'
    WINDOWS_MAIN_MEM = 'DIGITAL_PLAYER'
    WINDOWS_CARD_A_MEM = 'DIGITAL_PLAYER'
    MAIN_MEMORY_VOLUME_LABEL  = 'K3 Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'K3 Storage Card'
    EBOOK_DIR_MAIN = ''
    EBOOK_DIR_CARD_A = ''
    SUPPORTS_SUB_DIRS = True
    def windows_sort_drives(self, drives):
        main = drives.get('main', None)
        card = drives.get('carda', None)
        if card and main and card < main:
            drives['main'] = card
            drives['carda'] = main
        return drives
--- a/src/calibre/devices/usbms/cli.py
+++ b/src/calibre/devices/usbms/cli.py
@ -4,8 +4,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import os
+import os, shutil, time
 import shutil
 from calibre.devices.errors import PathError
@ -50,11 +49,12 @@ class CLI(object):
        d = os.path.dirname(path)
        if not os.path.exists(d):
            os.makedirs(d)
-        with open(path, 'wb') as dest:
+        with open(path, 'w+b') as dest:
            try:
                shutil.copyfileobj(infile, dest)
            except IOError:
                print 'WARNING: First attempt to send file to device failed'
                time.sleep(0.2)
                infile.seek(0)
                dest.seek(0)
                dest.truncate()
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -17,6 +17,7 @@ import time
 import re
 import sys
 import glob
 from itertools import repeat
 from calibre.devices.interface import DevicePlugin
@ -333,10 +334,14 @@ class Device(DeviceConfig, DevicePlugin):
                    raise
            time.sleep(2)
-    def _osx_bsd_names(self):
+    @classmethod
    def osx_get_usb_drives(cls):
        if usbobserver_err:
            raise RuntimeError('Failed to load usbobserver: '+usbobserver_err)
-        drives = usbobserver.get_usb_drives()
+        return usbobserver.get_usb_drives()
    def _osx_bsd_names(self):
        drives = self.osx_get_usb_drives()
        matches = []
        d = self.detected_device
        if d.serial:
@ -394,16 +399,6 @@ class Device(DeviceConfig, DevicePlugin):
        if len(matches) > 2:
            drives['cardb'] = matches[2]
        pat = self.OSX_MAIN_MEM_VOL_PAT
        if pat is not None and len(drives) > 1 and 'main' in drives:
            if pat.search(drives['main']) is None:
                main = drives['main']
                for x in ('carda', 'cardb'):
                    if x in drives and pat.search(drives[x]):
                        drives['main'] = drives.pop(x)
                        drives[x] = main
                        break
        return drives
    def osx_bsd_names(self):
@ -427,6 +422,16 @@ class Device(DeviceConfig, DevicePlugin):
        if drives['main'] is None:
            print bsd_drives, mount_map, drives
            raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
        pat = self.OSX_MAIN_MEM_VOL_PAT
        if pat is not None and len(drives) > 1 and 'main' in drives:
            if pat.search(drives['main']) is None:
                main = drives['main']
                for x in ('carda', 'cardb'):
                    if x in drives and pat.search(drives[x]):
                        drives['main'] = drives.pop(x)
                        drives[x] = main
                        break
        self._main_prefix = drives['main']+os.sep
        def get_card_prefix(c):
            ans = drives.get(c, None)
@ -789,7 +794,13 @@ class Device(DeviceConfig, DevicePlugin):
        '''
        return components
-    def create_upload_path(self, path, mdata, fname):
+    def get_annotations(self, path_map):
        '''
        Resolve path_map to annotation_map of files found on the device
        '''
        return {}
    def create_upload_path(self, path, mdata, fname, create_dirs=True):
        path = os.path.abspath(path)
        extra_components = []
@ -848,7 +859,7 @@ class Device(DeviceConfig, DevicePlugin):
        filedir = os.path.dirname(filepath)
-        if not os.path.exists(filedir):
+        if create_dirs and not os.path.exists(filedir):
            os.makedirs(filedir)
        return filepath
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -123,7 +123,7 @@ class USBMS(CLI, Device):
        '''
        :path: the full path were the associated book is located.
        :filename: the name of the book file without the extension.
-        :metatdata: metadata belonging to the book. Use metadata.thumbnail
+        :metadata: metadata belonging to the book. Use metadata.thumbnail
        for cover
        '''
        pass
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -129,3 +129,12 @@ def render_html(path_to_html, width=590, height=750):
    del loop
    return renderer
 def check_ebook_format(stream, current_guess):
    ans = current_guess
    if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1'):
        stream.seek(0)
        if stream.read(3) == 'TPZ':
            ans = 'tpz'
        stream.seek(0)
    return ans
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -53,13 +53,15 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
                        "x-sjis" : "shift-jis" }
-def force_encoding(raw, verbose):
+def force_encoding(raw, verbose, assume_utf8=False):
    from calibre.constants import preferred_encoding
    try:
        chardet = detect(raw)
    except:
        chardet = {'encoding':preferred_encoding, 'confidence':0}
    encoding = chardet['encoding']
    if chardet['confidence'] < 1 and assume_utf8:
        encoding = 'utf-8'
    if chardet['confidence'] < 1 and verbose:
        print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
    if not encoding:
@ -73,7 +75,7 @@ def force_encoding(raw, verbose):
 def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
-                   resolve_entities=False):
+                   resolve_entities=False, assume_utf8=False):
    '''
    Force conversion of byte string to unicode. Tries to look for XML/HTML
    encoding declaration first, if not found uses the chardet library and
@ -95,7 +97,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
                encoding = match.group(1)
                break
        if encoding is None:
-            encoding = force_encoding(raw, verbose)
+            encoding = force_encoding(raw, verbose, assume_utf8=assume_utf8)
        try:
            if encoding.lower().strip() == 'macintosh':
                encoding = 'mac-roman'
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -1,213 +1,17 @@
 from __future__ import with_statement
 ''' CHM File decoding support '''
 __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                 ' and Alex Bramley <a.bramley at gmail.com>.'
-import os, shutil, uuid, re
+import os, uuid
 from tempfile import mkdtemp
 from mimetypes import guess_type as guess_mimetype
 from BeautifulSoup import BeautifulSoup, NavigableString
 from lxml import html
 from pychm.chm import CHMFile
 from pychm.chmlib import (
  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
  chm_enumerate,
 )
-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre.customize.conversion import InputFormatPlugin
-from calibre.utils.config import OptionParser
+from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.metadata.toc import TOC
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
 def match_string(s1, s2_already_lowered):
    if s1 is not None and s2_already_lowered is not None:
        if s1.lower()==s2_already_lowered:
            return True
    return False
 def check_all_prev_empty(tag):
    if tag is None:
        return True
    if tag.__class__ == NavigableString and not check_empty(tag):
        return False
    return check_all_prev_empty(tag.previousSibling)
 def check_empty(s, rex = re.compile(r'\S')):
    return rex.search(s) is None
 def option_parser():
    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
    parser.add_option("-t", "--title", action="store", type="string", \
                    dest="title", help=_("Set the book title"))
    parser.add_option('--title-sort', action='store', type='string', default=None,
                      dest='title_sort', help=_('Set sort key for the title'))
    parser.add_option("-a", "--author", action="store", type="string", \
                    dest="author", help=_("Set the author"))
    parser.add_option('--author-sort', action='store', type='string', default=None,
                      dest='author_sort', help=_('Set sort key for the author'))
    parser.add_option("-c", "--category", action="store", type="string", \
                    dest="category", help=_("The category this book belongs"
                    " to. E.g.: History"))
    parser.add_option("--thumbnail", action="store", type="string", \
                    dest="thumbnail", help=_("Path to a graphic that will be"
                    " set as this files' thumbnail"))
    parser.add_option("--comment", action="store", type="string", \
                    dest="freetext", help=_("Path to a txt file containing a comment."))
    parser.add_option("--get-thumbnail", action="store_true", \
                    dest="get_thumbnail", default=False, \
                    help=_("Extract thumbnail from LRF file"))
    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
    parser.add_option('--classification', default=None, help=_('Set the book classification'))
    parser.add_option('--creator', default=None, help=_('Set the book creator'))
    parser.add_option('--producer', default=None, help=_('Set the book producer'))
    parser.add_option('--get-cover', action='store_true', default=False,
                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
    parser.add_option('--bookid', action='store', type='string', default=None,
                      dest='book_id', help=_('Set book ID'))
    parser.add_option('--font-delta', action='store', type='int', default=0,
                      dest='font_delta', help=_('Set font delta'))
    return parser
 class CHMError(Exception):
    pass
 class CHMReader(CHMFile):
    def __init__(self, input, log):
        CHMFile.__init__(self)
        if not self.LoadCHM(input):
            raise CHMError("Unable to open CHM file '%s'"%(input,))
        self.log = log
        self._sourcechm = input
        self._contents = None
        self._playorder = 0
        self._metadata = False
        self._extracted = False
        # location of '.hhc' file, which is the CHM TOC.
        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
        self.hhc_path = self.root + ".hhc"
    def _parse_toc(self, ul, basedir=os.getcwdu()):
        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
        self._playorder += 1
        for li in ul('li', recursive=False):
            href = li.object('param', {'name': 'Local'})[0]['value']
            if href.count('#'):
                href, frag = href.split('#')
            else:
                frag = None
            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
            #print "========>", name
            toc.add_item(href, frag, name, play_order=self._playorder)
            self._playorder += 1
            if li.ul:
               child = self._parse_toc(li.ul)
               child.parent = toc
               toc.append(child)
        #print toc
        return toc
    def GetFile(self, path):
        # have to have abs paths for ResolveObject, but Contents() deliberately
        # makes them relative. So we don't have to worry, re-add the leading /.
        # note this path refers to the internal CHM structure
        if path[0] != '/':
            path = '/' + path
        res, ui = self.ResolveObject(path)
        if res != CHM_RESOLVE_SUCCESS:
            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
        size, data = self.RetrieveObject(ui)
        if size == 0:
            raise CHMError("'%s' is zero bytes in length!"%(path,))
        return data
    def ExtractFiles(self, output_dir=os.getcwdu()):
        for path in self.Contents():
            lpath = os.path.join(output_dir, path)
            self._ensure_dir(lpath)
            data = self.GetFile(path)
            with open(lpath, 'wb') as f:
                if guess_mimetype(path)[0] == ('text/html'):
                    data = self._reformat(data)
                f.write(data)
        #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
        self._extracted = True
    def _reformat(self, data):
        try:
            soup = BeautifulSoup(data)
        except UnicodeEncodeError:
            # hit some strange encoding problems...
            print "Unable to parse html for cleaning, leaving it :("
            return data
        # nuke javascript...
        [s.extract() for s in soup('script')]
        # remove forward and back nav bars from the top/bottom of each page
        # cos they really fuck with the flow of things and generally waste space
        # since we can't use [a,b] syntax to select arbitrary items from a list
        # we'll have to do this manually...
        t = soup('table')
        if t:
            if (t[0].previousSibling is None
              or t[0].previousSibling.previousSibling is None):
                t[0].extract()
            if (t[-1].nextSibling is None
              or t[-1].nextSibling.nextSibling is None):
                t[-1].extract()
        # for some very odd reason each page's content appears to be in a table
        # too. and this table has sub-tables for random asides... grr.
        # remove br at top of page if present after nav bars removed
        br = soup('br')
        if br:
            if check_all_prev_empty(br[0].previousSibling):
                br[0].extract()
        # some images seem to be broken in some chm's :/
        for img in soup('img'):
            try:
                # some are supposedly "relative"... lies.
                while img['src'].startswith('../'): img['src'] = img['src'][3:]
                # some have ";<junk>" at the end.
                img['src'] = img['src'].split(';')[0]
            except KeyError:
                # and some don't even have a src= ?!
                pass
        # now give back some pretty html.
        return soup.prettify()
    def Contents(self):
        if self._contents is not None:
            return self._contents
        paths = []
        def get_paths(chm, ui, ctx):
            # skip directories
            # note this path refers to the internal CHM structure
            if ui.path[-1] != '/':
                # and make paths relative
                paths.append(ui.path.lstrip('/'))
        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
        self._contents = paths
        return self._contents
    def _ensure_dir(self, path):
        dir = os.path.dirname(path)
        if not os.path.isdir(dir):
            os.makedirs(dir)
    def extract_content(self, output_dir=os.getcwdu()):
        self.ExtractFiles(output_dir=output_dir)
 class CHMInput(InputFormatPlugin):
    name        = 'CHM Input'
@ -215,52 +19,49 @@ class CHMInput(InputFormatPlugin):
    description = 'Convert CHM files to OEB'
    file_types  = set(['chm'])
    options = set([
        OptionRecommendation(name='dummy_option', recommended_value=False,
            help=_('dummy option until real options are determined.')),
    ])
    def _chmtohtml(self, output_dir, chm_path, no_images, log):
        from calibre.ebooks.chm.reader import CHMReader
        log.debug('Opening CHM file')
        rdr = CHMReader(chm_path, log)
        log.debug('Extracting CHM to %s' % output_dir)
        rdr.extract_content(output_dir)
        self._chm_reader = rdr
        return rdr.hhc_path
    def convert(self, stream, options, file_ext, log, accelerators):
-        from calibre.ebooks.metadata.chm import get_metadata_
+        from calibre.ebooks.chm.metadata import get_metadata_from_reader
        from calibre.customize.ui import plugin_for_input_format
        log.debug('Processing CHM...')
-        tdir = mkdtemp(prefix='chm2oeb_')
+        with TemporaryDirectory('_chm2oeb') as tdir:
-        from calibre.customize.ui import plugin_for_input_format
+            html_input = plugin_for_input_format('html')
-        html_input = plugin_for_input_format('html')
+            for opt in html_input.options:
-        for opt in html_input.options:
+                setattr(options, opt.option.name, opt.recommended_value)
-            setattr(options, opt.option.name, opt.recommended_value)
+            options.input_encoding = 'utf-8'
-        options.input_encoding = 'utf-8'
+            no_images = False #options.no_images
-        no_images = False #options.no_images
+            chm_name = stream.name
-        chm_name = stream.name
+            #chm_data = stream.read()
        #chm_data = stream.read()
-        #closing stream so CHM can be opened by external library
+            #closing stream so CHM can be opened by external library
-        stream.close()
+            stream.close()
-        log.debug('tdir=%s' % tdir)
+            log.debug('tdir=%s' % tdir)
-        log.debug('stream.name=%s' % stream.name)
+            log.debug('stream.name=%s' % stream.name)
-        mainname = self._chmtohtml(tdir, chm_name, no_images, log)
+            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
-        mainpath = os.path.join(tdir, mainname)
+            mainpath = os.path.join(tdir, mainname)
            #raw_input()
-        metadata = get_metadata_(tdir)
+            metadata = get_metadata_from_reader(self._chm_reader)
-        odi = options.debug_pipeline
+            odi = options.debug_pipeline
-        options.debug_pipeline = None
+            options.debug_pipeline = None
-        # try a custom conversion:
+            # try a custom conversion:
-        #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
+            #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
-        # try using html converter:
+            # try using html converter:
-        htmlpath = self._create_html_root(mainpath, log)
+            htmlpath = self._create_html_root(mainpath, log)
-        oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
+            oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
-        options.debug_pipeline = odi
+            options.debug_pipeline = odi
-        #log.debug('DEBUG: Not removing tempdir %s' % tdir)
+            #log.debug('DEBUG: Not removing tempdir %s' % tdir)
        shutil.rmtree(tdir)
        return oeb
    def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
@ -369,6 +170,9 @@ class CHMInput(InputFormatPlugin):
        # check that node is a normal node (not a comment, DOCTYPE, etc.)
        # (normal nodes have string tags)
        if isinstance(node.tag, basestring):
            from calibre.ebooks.chm.reader import match_string
            chapter_path = None
            if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
                for child in node:
                    if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
--- a/src/calibre/ebooks/chm/metadata.py
+++ b/src/calibre/ebooks/chm/metadata.py
@ -0,0 +1,157 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import string_to_authors, MetaInformation
 from calibre.utils.logging import default_log
 from calibre.ptempfile import TemporaryFile
 def _clean(s):
    return s.replace(u'\u00a0', u' ')
 def _detag(tag):
    str = u""
    for elem in tag:
        if hasattr(elem, "contents"):
            str += _detag(elem)
        else:
            str += _clean(elem)
    return str
 def _metadata_from_table(soup, searchfor):
    td = soup.find('td', text=re.compile(searchfor, flags=re.I))
    if td is None:
        return None
    td = td.parent
    # there appears to be multiple ways of structuring the metadata
    # on the home page. cue some nasty special-case hacks...
    if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(), flags=re.I):
        meta = _detag(td.findNextSibling('td'))
        return re.sub('^:', '', meta).strip()
    else:
        meta = _detag(td)
        return re.sub(r'^[^:]+:', '', meta).strip()
 def _metadata_from_span(soup, searchfor):
    span = soup.find('span', {'class': re.compile(searchfor, flags=re.I)})
    if span is None:
        return None
    # this metadata might need some cleaning up still :/
    return _detag(span.renderContents().strip())
 def _get_authors(soup):
    aut = (_metadata_from_span(soup, r'author')
        or _metadata_from_table(soup, r'^\s*by\s*:?\s+'))
    ans = [_('Unknown')]
    if aut is not None:
        ans = string_to_authors(aut)
    return ans
 def _get_publisher(soup):
    return (_metadata_from_span(soup, 'imprint')
        or _metadata_from_table(soup, 'publisher'))
 def _get_isbn(soup):
    return (_metadata_from_span(soup, 'isbn')
        or _metadata_from_table(soup, 'isbn'))
 def _get_comments(soup):
    date = (_metadata_from_span(soup, 'cwdate')
        or _metadata_from_table(soup, 'pub date'))
    pages = ( _metadata_from_span(soup, 'pages')
        or _metadata_from_table(soup, 'pages'))
    try:
        # date span can have copyright symbols in it...
        date = date.replace(u'\u00a9', '').strip()
        # and pages often comes as '(\d+ pages)'
        pages = re.search(r'\d+', pages).group(0)
        return u'Published %s, %s pages.' % (date, pages)
    except:
        pass
    return None
 def _get_cover(soup, rdr):
    ans = None
    try:
        ans = soup.find('img', alt=re.compile('cover', flags=re.I))['src']
    except TypeError:
        # meeehh, no handy alt-tag goodness, try some hackery
        # the basic idea behind this is that in general, the cover image
        # has a height:width ratio of ~1.25, whereas most of the nav
        # buttons are decidedly less than that.
        # what we do in this is work out that ratio, take 1.25 off it and
        # save the absolute value when we sort by this value, the smallest
        # one is most likely to be the cover image, hopefully.
        r = {}
        for img in soup('img'):
            try:
                r[abs(float(img['height'])/float(img['width'])-1.25)] = img['src']
            except KeyError:
                # interestingly, occasionally the only image without height
                # or width attrs is the cover...
                r[0] = img['src']
        l = r.keys()
        l.sort()
        ans = r[l[0]]
    # this link comes from the internal html, which is in a subdir
    if ans is not None:
        try:
            ans = rdr.GetFile(ans)
        except:
            ans = rdr.root + "/" + ans
            try:
                ans = rdr.GetFile(ans)
            except:
                ans = None
        if ans is not None:
            from PIL import Image
            from cStringIO import StringIO
            buf = StringIO()
            try:
                Image.open(StringIO(ans)).convert('RGB').save(buf, 'JPEG')
                ans = buf.getvalue()
            except:
                ans = None
    return ans
 def get_metadata_from_reader(rdr):
    raw = rdr.GetFile(rdr.home)
    home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True,
        resolve_entities=True)[0])
    title = rdr.title
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
    if publisher:
        mi.publisher = publisher
    isbn = _get_isbn(home)
    if isbn:
        mi.isbn = isbn
    comments = _get_comments(home)
    if comments:
        mi.comments = comments
    cdata = _get_cover(home, rdr)
    if cdata is not None:
        mi.cover_data = ('jpg', cdata)
    return mi
 def get_metadata(stream):
    with TemporaryFile('_chm_metadata.chm') as fname:
        with open(fname, 'wb') as f:
            f.write(stream.read())
        from calibre.ebooks.chm.reader import CHMReader
        rdr = CHMReader(fname, default_log)
        return get_metadata_from_reader(rdr)
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -0,0 +1,212 @@
 from __future__ import with_statement
 ''' CHM File decoding support '''
 __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                 ' and Alex Bramley <a.bramley at gmail.com>.'
 import os, re
 from mimetypes import guess_type as guess_mimetype
 from BeautifulSoup import BeautifulSoup, NavigableString
 from calibre.utils.chm.chm import CHMFile
 from calibre.utils.chm.chmlib import (
  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
  chm_enumerate,
 )
 from calibre.utils.config import OptionParser
 from calibre.ebooks.metadata.toc import TOC
 def match_string(s1, s2_already_lowered):
    if s1 is not None and s2_already_lowered is not None:
        if s1.lower()==s2_already_lowered:
            return True
    return False
 def check_all_prev_empty(tag):
    if tag is None:
        return True
    if tag.__class__ == NavigableString and not check_empty(tag):
        return False
    return check_all_prev_empty(tag.previousSibling)
 def check_empty(s, rex = re.compile(r'\S')):
    return rex.search(s) is None
 def option_parser():
    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
    parser.add_option("-t", "--title", action="store", type="string", \
                    dest="title", help=_("Set the book title"))
    parser.add_option('--title-sort', action='store', type='string', default=None,
                      dest='title_sort', help=_('Set sort key for the title'))
    parser.add_option("-a", "--author", action="store", type="string", \
                    dest="author", help=_("Set the author"))
    parser.add_option('--author-sort', action='store', type='string', default=None,
                      dest='author_sort', help=_('Set sort key for the author'))
    parser.add_option("-c", "--category", action="store", type="string", \
                    dest="category", help=_("The category this book belongs"
                    " to. E.g.: History"))
    parser.add_option("--thumbnail", action="store", type="string", \
                    dest="thumbnail", help=_("Path to a graphic that will be"
                    " set as this files' thumbnail"))
    parser.add_option("--comment", action="store", type="string", \
                    dest="freetext", help=_("Path to a txt file containing a comment."))
    parser.add_option("--get-thumbnail", action="store_true", \
                    dest="get_thumbnail", default=False, \
                    help=_("Extract thumbnail from LRF file"))
    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
    parser.add_option('--classification', default=None, help=_('Set the book classification'))
    parser.add_option('--creator', default=None, help=_('Set the book creator'))
    parser.add_option('--producer', default=None, help=_('Set the book producer'))
    parser.add_option('--get-cover', action='store_true', default=False,
                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
    parser.add_option('--bookid', action='store', type='string', default=None,
                      dest='book_id', help=_('Set book ID'))
    parser.add_option('--font-delta', action='store', type='int', default=0,
                      dest='font_delta', help=_('Set font delta'))
    return parser
 class CHMError(Exception):
    pass
 class CHMReader(CHMFile):
    def __init__(self, input, log):
        CHMFile.__init__(self)
        if not self.LoadCHM(input):
            raise CHMError("Unable to open CHM file '%s'"%(input,))
        self.log = log
        self._sourcechm = input
        self._contents = None
        self._playorder = 0
        self._metadata = False
        self._extracted = False
        # location of '.hhc' file, which is the CHM TOC.
        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
        self.hhc_path = self.root + ".hhc"
    def _parse_toc(self, ul, basedir=os.getcwdu()):
        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
        self._playorder += 1
        for li in ul('li', recursive=False):
            href = li.object('param', {'name': 'Local'})[0]['value']
            if href.count('#'):
                href, frag = href.split('#')
            else:
                frag = None
            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
            #print "========>", name
            toc.add_item(href, frag, name, play_order=self._playorder)
            self._playorder += 1
            if li.ul:
               child = self._parse_toc(li.ul)
               child.parent = toc
               toc.append(child)
        #print toc
        return toc
    def GetFile(self, path):
        # have to have abs paths for ResolveObject, but Contents() deliberately
        # makes them relative. So we don't have to worry, re-add the leading /.
        # note this path refers to the internal CHM structure
        if path[0] != '/':
            path = '/' + path
        res, ui = self.ResolveObject(path)
        if res != CHM_RESOLVE_SUCCESS:
            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
        size, data = self.RetrieveObject(ui)
        if size == 0:
            raise CHMError("'%s' is zero bytes in length!"%(path,))
        return data
    def ExtractFiles(self, output_dir=os.getcwdu()):
        for path in self.Contents():
            lpath = os.path.join(output_dir, path)
            self._ensure_dir(lpath)
            data = self.GetFile(path)
            with open(lpath, 'wb') as f:
                if guess_mimetype(path)[0] == ('text/html'):
                    data = self._reformat(data)
                f.write(data)
        self._extracted = True
        files = os.listdir(output_dir)
        if self.hhc_path not in files:
            for f in files:
                if f.lower() == self.hhc_path.lower():
                    self.hhc_path = f
                    break
    def _reformat(self, data):
        try:
            soup = BeautifulSoup(data)
        except UnicodeEncodeError:
            # hit some strange encoding problems...
            print "Unable to parse html for cleaning, leaving it :("
            return data
        # nuke javascript...
        [s.extract() for s in soup('script')]
        # remove forward and back nav bars from the top/bottom of each page
        # cos they really fuck with the flow of things and generally waste space
        # since we can't use [a,b] syntax to select arbitrary items from a list
        # we'll have to do this manually...
        t = soup('table')
        if t:
            if (t[0].previousSibling is None
              or t[0].previousSibling.previousSibling is None):
                t[0].extract()
            if (t[-1].nextSibling is None
              or t[-1].nextSibling.nextSibling is None):
                t[-1].extract()
        # for some very odd reason each page's content appears to be in a table
        # too. and this table has sub-tables for random asides... grr.
        # remove br at top of page if present after nav bars removed
        br = soup('br')
        if br:
            if check_all_prev_empty(br[0].previousSibling):
                br[0].extract()
        # some images seem to be broken in some chm's :/
        for img in soup('img'):
            try:
                # some are supposedly "relative"... lies.
                while img['src'].startswith('../'): img['src'] = img['src'][3:]
                # some have ";<junk>" at the end.
                img['src'] = img['src'].split(';')[0]
            except KeyError:
                # and some don't even have a src= ?!
                pass
        # now give back some pretty html.
        return soup.prettify()
    def Contents(self):
        if self._contents is not None:
            return self._contents
        paths = []
        def get_paths(chm, ui, ctx):
            # skip directories
            # note this path refers to the internal CHM structure
            if ui.path[-1] != '/':
                # and make paths relative
                paths.append(ui.path.lstrip('/'))
        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
        self._contents = paths
        return self._contents
    def _ensure_dir(self, path):
        dir = os.path.dirname(path)
        if not os.path.isdir(dir):
            os.makedirs(dir)
    def extract_content(self, output_dir=os.getcwdu()):
        self.ExtractFiles(output_dir=output_dir)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -13,6 +13,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
 from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.date import parse_date
 from calibre.utils.zipfile import ZipFile
 from calibre import extract, walk
 DEBUG_README=u'''
@ -726,6 +727,13 @@ OptionRecommendation(name='timestamp',
        else:
            os.makedirs(out_dir)
            self.dump_oeb(ret, out_dir)
        if self.input_fmt == 'recipe':
            zf = ZipFile(os.path.join(self.opts.debug_pipeline,
                'periodical.downloaded_recipe'), 'w')
            zf.add_dir(out_dir)
            with self.input_plugin:
                self.input_plugin.save_download(zf)
            zf.close()
        self.log.info('Input debug saved to:', out_dir)
@ -773,26 +781,29 @@ OptionRecommendation(name='timestamp',
        self.ui_reporter(0.01, _('Converting input to HTML...'))
        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
        self.input_plugin.report_progress = ir
-        self.oeb = self.input_plugin(stream, self.opts,
+        with self.input_plugin:
-                                    self.input_fmt, self.log,
+            self.oeb = self.input_plugin(stream, self.opts,
-                                    accelerators, tdir)
+                                        self.input_fmt, self.log,
-        if self.opts.debug_pipeline is not None:
+                                        accelerators, tdir)
-            self.dump_input(self.oeb, tdir)
+            if self.opts.debug_pipeline is not None:
-            if self.abort_after_input_dump:
+                self.dump_input(self.oeb, tdir)
-                return
+                if self.abort_after_input_dump:
-        if self.input_fmt == 'recipe':
+                    return
-            self.opts_to_mi(self.user_metadata)
+            if self.input_fmt in ('recipe', 'downloaded_recipe'):
-        if not hasattr(self.oeb, 'manifest'):
+                self.opts_to_mi(self.user_metadata)
-            self.oeb = create_oebbook(self.log, self.oeb, self.opts,
+            if not hasattr(self.oeb, 'manifest'):
-                    self.input_plugin)
+                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
-        self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
+                        self.input_plugin)
-        self.opts.is_image_collection = self.input_plugin.is_image_collection
+            self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
-        pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
+            self.opts.is_image_collection = self.input_plugin.is_image_collection
-        self.flush()
+            pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
-        if self.opts.debug_pipeline is not None:
+            self.flush()
-            out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
+            if self.opts.debug_pipeline is not None:
-            self.dump_oeb(self.oeb, out_dir)
+                out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
-            self.log('Parsed HTML written to:', out_dir)
+                self.dump_oeb(self.oeb, out_dir)
                self.log('Parsed HTML written to:', out_dir)
            self.input_plugin.specialize(self.oeb, self.opts, self.log,
                    self.output_fmt)
        pr(0., _('Running transforms on ebook...'))
@ -882,7 +893,8 @@ OptionRecommendation(name='timestamp',
        our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
        self.output_plugin.report_progress = our
        our(0., _('Creating')+' %s'%self.output_plugin.name)
-        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
+        with self.output_plugin:
            self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
                self.opts, self.log)
        self.ui_reporter(1.)
        run_plugins_on_postprocess(self.output, self.output_fmt)
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, re, uuid
+import os, uuid
 from itertools import cycle
 from lxml import etree
@ -19,8 +19,7 @@ class EPUBInput(InputFormatPlugin):
    recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
-    @classmethod
+    def decrypt_font(self, key, path):
    def decrypt_font(cls, key, path):
        raw = open(path, 'rb').read()
        crypt = raw[:1024]
        key = cycle(iter(key))
@ -29,13 +28,18 @@ class EPUBInput(InputFormatPlugin):
            f.write(decrypt)
            f.write(raw[1024:])
-    @classmethod
+    def process_encryption(self, encfile, opf, log):
    def process_encryption(cls, encfile, opf, log):
        key = None
-        m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
+        for item in opf.identifier_iter():
-        if m:
+            scheme = None
-            key = m.group(1)
+            for key in item.attrib.keys():
-            key = list(map(ord, uuid.UUID(key).bytes))
+                if key.endswith('scheme'):
                    scheme = item.get(key)
            if (scheme and scheme.lower() == 'uuid') or \
                    (item.text and item.text.startswith('urn:uuid:')):
                key = str(item.text).rpartition(':')[-1]
                key = list(map(ord, uuid.UUID(key).bytes))
        try:
            root = etree.parse(encfile)
            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
@ -46,7 +50,8 @@ class EPUBInput(InputFormatPlugin):
                uri = cr.get('URI')
                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
                if os.path.exists(path):
-                    cls.decrypt_font(key, path)
+                    self._encrypted_font_uris.append(uri)
                    self.decrypt_font(key, path)
            return True
        except:
            import traceback
@ -115,14 +120,17 @@ class EPUBInput(InputFormatPlugin):
        if opf is None:
            raise ValueError('%s is not a valid EPUB file'%path)
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
        self._encrypted_font_uris = []
        if os.path.exists(encfile):
            if not self.process_encryption(encfile, opf, log):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris
        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1])+'/'
            for elem in opf.itermanifest():
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -12,8 +12,9 @@ from urllib import unquote
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.constants import __appname__, __version__
-from calibre import strftime, guess_type, prepare_string_for_xml
+from calibre import strftime, guess_type, prepare_string_for_xml, CurrentDir
 from calibre.customize.conversion import OptionRecommendation
 from calibre.constants import filesystem_encoding
 from lxml import etree
@ -157,11 +158,9 @@ class EPUBOutput(OutputFormatPlugin):
        self.workaround_ade_quirks()
        self.workaround_webkit_quirks()
        self.workaround_sony_quirks()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages()(oeb, opts)
        from calibre.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                max_flow_size=self.opts.flow_size*1024
@ -170,6 +169,21 @@ class EPUBOutput(OutputFormatPlugin):
        self.insert_cover()
        self.workaround_sony_quirks()
        from calibre.ebooks.oeb.base import OPF
        identifiers = oeb.metadata['identifier']
        uuid = None
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
                uuid = unicode(x).split(':')[-1]
                break
        if uuid is None:
            self.log.warn('No UUID identifier found')
            from uuid import uuid4
            uuid = str(uuid4())
            oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
        with TemporaryDirectory('_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
@ -177,10 +191,16 @@ class EPUBOutput(OutputFormatPlugin):
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
                    if x.endswith('.ncx')][0])
            encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
            encryption = None
            if encrypted_fonts:
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
            from calibre.ebooks.epub import initialize_container
            epub = initialize_container(output_path, os.path.basename(opf))
            epub.add_dir(tdir)
            if encryption is not None:
                epub.writestr('META-INF/encryption.xml', encryption)
            if opts.extract_to is not None:
                if os.path.exists(opts.extract_to):
                    shutil.rmtree(opts.extract_to)
@ -189,6 +209,52 @@ class EPUBOutput(OutputFormatPlugin):
                self.log.info('EPUB extracted to', opts.extract_to)
            epub.close()
    def encrypt_fonts(self, uris, tdir, uuid):
        from binascii import unhexlify
        key = re.sub(r'[^a-fA-F0-9]', '', uuid)
        if len(key) < 16:
            raise ValueError('UUID identifier %r is invalid'%uuid)
        key = unhexlify((key + key)[:32])
        key = tuple(map(ord, key))
        paths = []
        with CurrentDir(tdir):
            paths = [os.path.join(*x.split('/')) for x in uris]
            uris = dict(zip(uris, paths))
            fonts = []
            for uri in list(uris.keys()):
                path = uris[uri]
                if isinstance(path, unicode):
                    path = path.encode(filesystem_encoding)
                if not os.path.exists(path):
                    uris.pop(uri)
                    continue
                self.log.debug('Encrypting font:', uri)
                with open(path, 'r+b') as f:
                    data = f.read(1024)
                    f.seek(0)
                    for i in range(1024):
                        f.write(chr(ord(data[i]) ^ key[i%16]))
                if not isinstance(uri, unicode):
                    uri = uri.decode('utf-8')
                fonts.append(u'''
                <enc:EncryptedData>
                    <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>
                    <enc:CipherData>
                    <enc:CipherReference URI="%s"/>
                    </enc:CipherData>
                </enc:EncryptedData>
                '''%(uri.replace('"', '\\"')))
            if fonts:
                    ans = '''<encryption
                    xmlns="urn:oasis:names:tc:opendocument:xmlns:container"
                    xmlns:enc="http://www.w3.org/2001/04/xmlenc#"
                    xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">
                    '''
                    ans += (u'\n'.join(fonts)).encode('utf-8')
                    ans += '\n</encryption>'
                    return ans
    def default_cover(self):
        '''
        Create a generic cover for books that dont have a cover
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -20,7 +20,7 @@ from itertools import izip
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
-from calibre.constants import islinux
+from calibre.constants import islinux, isfreebsd
 from calibre import unicode_path
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
@ -346,7 +346,7 @@ class HTMLInput(InputFormatPlugin):
        self.added_resources = {}
        self.log = log
        for path, href in htmlfile_map.items():
-            if not islinux:
+            if not (islinux or isfreebsd):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
@ -417,7 +417,7 @@ class HTMLInput(InputFormatPlugin):
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
-        if not islinux:
+        if not (islinux or isfreebsd):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -215,6 +215,28 @@ def merge_results(one, two):
        else:
            one[idx].smart_update(x)
 class MetadataSources(object):
    def __init__(self, sources):
        self.sources = sources
    def __enter__(self):
        for s in self.sources:
            s.__enter__()
        return self
    def __exit__(self, *args):
        for s in self.sources:
            s.__exit__()
    def __call__(self, *args, **kwargs):
        for s in self.sources:
            s(*args, **kwargs)
    def join(self):
        for s in self.sources:
            s.join()
 def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
           verbose=0):
    assert not(title is None and author is None and publisher is None and \
@ -224,11 +246,10 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
    if isbn is not None:
        isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
    fetchers = list(metadata_sources(isbndb_key=isbndb_key))
    with MetadataSources(fetchers) as manager:
        manager(title, author, publisher, isbn, verbose)
        manager.join()
    for fetcher in fetchers:
        fetcher(title, author, publisher, isbn, verbose)
    for fetcher in fetchers:
        fetcher.join()
    results = list(fetchers[0].results)
    for fetcher in fetchers[1:]:
        merge_results(results, fetcher.results)
@ -243,10 +264,9 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
 def get_social_metadata(mi, verbose=0):
    from calibre.customize.ui import metadata_sources
    fetchers = list(metadata_sources(metadata_type='social'))
-    for fetcher in fetchers:
+    with MetadataSources(fetchers) as manager:
-        fetcher(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
+        manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
-    for fetcher in fetchers:
+        manager.join()
        fetcher.join()
    ratings, tags, comments = [], set([]), set([])
    for fetcher in fetchers:
        if fetcher.results:
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -70,6 +70,17 @@ def is_recipe(filename):
        filename.rpartition('.')[0].endswith('_recipe_out')
 def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
    pos = 0
    if hasattr(stream, 'tell'):
        pos = stream.tell()
    try:
        return _get_metadata(stream, stream_type, use_libprs_metadata)
    finally:
        if hasattr(stream, 'seek'):
            stream.seek(pos)
 def _get_metadata(stream, stream_type, use_libprs_metadata):
    if stream_type: stream_type = stream_type.lower()
    if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
        stream_type = 'html'
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -97,9 +97,14 @@ class MetadataUpdater(object):
        self.nrecs, = unpack('>H', data[76:78])
        record0 = self.record0 = self.record(0)
        mobi_header_length, = unpack('>I', record0[0x14:0x18])
        if not mobi_header_length:
            raise MobiError("Non-standard file format.  Try 'Convert E-Books' with MOBI as Input and Output formats.")
        self.encryption_type, = unpack('>H', record0[12:14])
        codepage, = unpack('>I', record0[28:32])
        self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
        image_base, = unpack('>I', record0[108:112])
        flags, = self.flags, = unpack('>I', record0[128:132])
        have_exth = self.have_exth = (flags & 0x40) != 0
@ -306,9 +311,10 @@ class MetadataUpdater(object):
        return StreamSlicer(self.stream, start, stop)
    def update(self, mi):
-        def pop_exth_record(exth_id):
+        def update_exth_record(rec):
-            if exth_id in self.original_exth_records:
+            recs.append(rec)
-                self.original_exth_records.pop(exth_id)
+            if rec[0] in self.original_exth_records:
                self.original_exth_records.pop(rec[0])
        if self.type != "BOOKMOBI":
                raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
@ -323,47 +329,36 @@ class MetadataUpdater(object):
            pas = False
        if mi.author_sort and pas:
            authors = mi.author_sort
-            recs.append((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, authors.encode(self.codec, 'replace')))
            pop_exth_record(100)
        elif mi.authors:
            authors = '; '.join(mi.authors)
-            recs.append((100, authors.encode(self.codec, 'replace')))
+            update_exth_record((100, authors.encode(self.codec, 'replace')))
            pop_exth_record(100)
        if mi.publisher:
-            recs.append((101, mi.publisher.encode(self.codec, 'replace')))
+            update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
            pop_exth_record(101)
        if mi.comments:
-            recs.append((103, mi.comments.encode(self.codec, 'replace')))
+            update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
            pop_exth_record(103)
        if mi.isbn:
-            recs.append((104, mi.isbn.encode(self.codec, 'replace')))
+            update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
            pop_exth_record(104)
        if mi.tags:
            subjects = '; '.join(mi.tags)
-            recs.append((105, subjects.encode(self.codec, 'replace')))
+            update_exth_record((105, subjects.encode(self.codec, 'replace')))
            pop_exth_record(105)
        if mi.pubdate:
-            recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
+            update_exth_record((106, str(mi.pubdate).encode(self.codec, 'replace')))
            pop_exth_record(106)
        elif mi.timestamp:
-            recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
+            update_exth_record((106, str(mi.timestamp).encode(self.codec, 'replace')))
            pop_exth_record(106)
        elif self.timestamp:
-            recs.append((106, self.timestamp))
+            update_exth_record((106, self.timestamp))
            pop_exth_record(106)
        else:
-            recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
+            update_exth_record((106, nowf().isoformat().encode(self.codec, 'replace')))
            pop_exth_record(106)
        if self.cover_record is not None:
-            recs.append((201, pack('>I', self.cover_rindex)))
+            update_exth_record((201, pack('>I', self.cover_rindex)))
-            recs.append((203, pack('>I', 0)))
+            update_exth_record((203, pack('>I', 0)))
            pop_exth_record(201)
            pop_exth_record(203)
        if self.thumbnail_record is not None:
-            recs.append((202, pack('>I', self.thumbnail_rindex)))
+            update_exth_record((202, pack('>I', self.thumbnail_rindex)))
-            pop_exth_record(202)
+        if 503 in self.original_exth_records:
            update_exth_record((503, mi.title.encode(self.codec, 'replace')))
-        # Restore any original EXTH fields that weren't updated
+        # Include remaining original EXTH fields
        for id in sorted(self.original_exth_records):
            recs.append((id, self.original_exth_records[id]))
        recs = sorted(recs, key=lambda x:(x[0],x[0]))
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -779,6 +779,9 @@ class OPF(object):
            self.set_text(matches[0], unicode(val))
        return property(fget=fget, fset=fset)
    def identifier_iter(self):
        for item in self.identifier_path(self.metadata):
            yield item
    def guess_cover(self):
        '''
--- a/src/calibre/ebooks/metadata/rar.py
+++ b/src/calibre/ebooks/metadata/rar.py
@ -8,9 +8,10 @@ Read metadata from RAR archives
 '''
 import os
-from cStringIO import StringIO
+
-from calibre.ptempfile import PersistentTemporaryFile
+from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory
 from calibre.libunrar import extract_member, names
 from calibre import CurrentDir
 def get_metadata(stream):
    from calibre.ebooks.metadata.archive import is_comic
@ -32,8 +33,10 @@ def get_metadata(stream):
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                data = extract_member(path, match=None, name=f)[1]
+                with TemporaryDirectory() as tdir:
-                stream = StringIO(data)
+                    with CurrentDir(tdir):
                       stream = extract_member(path, match=None, name=f,
                               as_file=True)[1]
                return get_metadata(stream, stream_type)
    raise ValueError('No ebook found in RAR archive')
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -149,7 +149,8 @@ class TOC(list):
    def read_ncx_toc(self, toc):
        self.base_path = os.path.dirname(toc)
-        soup = NCXSoup(xml_to_unicode(open(toc, 'rb').read())[0])
+        raw  = xml_to_unicode(open(toc, 'rb').read(), assume_utf8=True)[0]
        soup = NCXSoup(raw)
        def process_navpoint(np, dest):
            play_order = np.get('playOrder', None)
@ -160,7 +161,7 @@ class TOC(list):
            if nl is not None:
                text = u''
                for txt in nl.findAll(re.compile('text')):
-                    text += ''.join([unicode(s) for s in txt.findAll(text=True)])
+                    text += u''.join([unicode(s) for s in txt.findAll(text=True)])
                content = np.find(re.compile('content'))
                if content is None or not content.has_key('src') or not txt:
                    return
--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@ -43,6 +43,8 @@ def read_metadata_(task, tdir, notification=lambda x,y:x):
            import_map = {}
            for format in formats:
                nfp = run_plugins_on_import(format)
                if nfp is None:
                    nfp = format
                nfp = os.path.abspath(nfp)
                if isinstance(nfp, unicode):
                    nfp.encode(filesystem_encoding)
--- a/src/calibre/ebooks/metadata/zip.py
+++ b/src/calibre/ebooks/metadata/zip.py
@ -3,9 +3,10 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os
 from zipfile import ZipFile
 from cStringIO import StringIO
 from calibre.utils.zipfile import ZipFile
 from calibre.ptempfile import TemporaryDirectory
 from calibre import CurrentDir
 def get_metadata(stream):
    from calibre.ebooks.metadata.meta import get_metadata
@ -23,8 +24,10 @@ def get_metadata(stream):
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                stream = StringIO(zf.read(f))
+                with TemporaryDirectory() as tdir:
-                return get_metadata(stream, stream_type)
+                    with CurrentDir(tdir):
                        path = zf.extract(f)
                        return get_metadata(open(path, 'rb'), stream_type)
    raise ValueError('No ebook found in ZIP archive')
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@ -154,7 +154,7 @@ class MOBIOutput(OutputFormatPlugin):
                MobiWriter, PALMDOC, UNCOMPRESSED
        from calibre.ebooks.mobi.mobiml import MobiMLizer
        from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
-        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
+        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
        from calibre.customize.ui import plugin_for_input_format
        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
@ -163,8 +163,11 @@ class MOBIOutput(OutputFormatPlugin):
            tocadder(oeb, opts)
        mangler = CaseMangler()
        mangler(oeb, opts)
-        rasterizer = SVGRasterizer()
+        try:
-        rasterizer(oeb, opts)
+            rasterizer = SVGRasterizer()
            rasterizer(oeb, opts)
        except Unavailable:
            self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
        mobimlizer(oeb, opts)
        self.check_for_periodical()
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -4,12 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''
-import functools
+import functools, shutil, os, re, struct, textwrap, cStringIO, sys
 import os
 import re
 import struct
 import textwrap
 import cStringIO
 try:
    from PIL import Image as PILImage
@ -619,6 +614,16 @@ class MobiReader(object):
                * opf.cover.split('/'))):
                opf.cover = None
        cover = opf.cover
        if cover is not None:
            cover = cover.replace('/', os.sep)
            if os.path.exists(cover):
                ncover = 'images'+os.sep+'calibre_cover.jpg'
                if os.path.exists(ncover):
                    os.remove(ncover)
                shutil.copyfile(cover, ncover)
            opf.cover = ncover.replace(os.sep, '/')
        manifest = [(htmlfile, 'application/xhtml+xml'),
            (os.path.abspath('styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
@ -796,15 +801,22 @@ class MobiReader(object):
 def get_metadata(stream):
    from calibre.utils.logging import Log
    log = Log()
    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
-    try:
+    mh = MetadataHeader(stream, log)
-        mh = MetadataHeader(stream, log)
+    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title
-        if mh.exth is not None:
+    if mh.exth is not None:
-            if mh.exth.mi is not None:
+        if mh.exth.mi is not None:
-                mi = mh.exth.mi
+            mi = mh.exth.mi
-        else:
+    else:
        size = sys.maxint
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
@ -812,16 +824,18 @@ def get_metadata(stream):
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
-        if hasattr(mh.exth, 'cover_offset'):
+    if hasattr(mh.exth, 'cover_offset'):
-            cover_index = mh.first_image_index + mh.exth.cover_offset
+        cover_index = mh.first_image_index + mh.exth.cover_offset
-            data  = mh.section_data(int(cover_index))
+        data  = mh.section_data(int(cover_index))
-        else:
+    else:
-            data  = mh.section_data(mh.first_image_index)
+        data  = mh.section_data(mh.first_image_index)
-        buf = cStringIO.StringIO(data)
+    buf = cStringIO.StringIO(data)
    try:
        im = PILImage.open(buf)
        obuf = cStringIO.StringIO()
        im.convert('RGBA').save(obuf, format='JPEG')
        mi.cover_data = ('jpg', obuf.getvalue())
    except:
-        log.exception()
+        log.exception('Failed to read MOBI cover')
    else:
        obuf = cStringIO.StringIO()
        im.convert('RGB').save(obuf, format='JPEG')
        mi.cover_data = ('jpg', obuf.getvalue())
    return mi
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -152,13 +152,17 @@ class EbookIterator(object):
                        prints('Substituting font family: %s -> %s'%(bad, good))
                        return match.group().replace(bad, '"%s"'%good)
            from calibre.ebooks.chardet import force_encoding
            for csspath in css_files:
                with open(csspath, 'r+b') as f:
                    css = f.read()
-                    css = font_family_pat.sub(prepend_embedded_font, css)
+                    enc = force_encoding(css, False)
-                    f.seek(0)
+                    css = css.decode(enc, 'replace')
-                    f.truncate()
+                    ncss = font_family_pat.sub(prepend_embedded_font, css)
-                    f.write(css)
+                    if ncss != css:
                        f.seek(0)
                        f.truncate()
                        f.write(ncss.encode(enc))
    def __enter__(self, processed=False):
        self.delete_on_exit = []
@ -173,11 +177,12 @@ class EbookIterator(object):
            plumber.opts.no_process = True
        plumber.input_plugin.for_viewer = True
-        self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
+        with plumber.input_plugin:
            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
                plumber.opts, plumber.input_fmt, self.log,
                {}, self.base)
-        if processed or plumber.input_fmt.lower() in ('pdf', 'rb') and \
+        if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
                not hasattr(self.pathtoopf, 'manifest'):
            self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
                    plumber.input_plugin)
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -331,7 +331,10 @@ class OEBReader(object):
            id = child.get('id')
            klass = child.get('class', 'chapter')
-            po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
+            try:
                po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
            except:
                po = self.oeb.toc.next_play_order()
            authorElement = xpath(child,
                    'descendant::calibre:meta[@name = "author"]')
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -190,11 +190,11 @@ class Stylizer(object):
                    selector = CSSSelector(ntext)
                    matches = selector(tree)
-            if not matches and class_sel_pat.match(text):
+            if not matches and class_sel_pat.match(text) and text.lower() != text:
                found = False
                ltext = text.lower()
                for x in tree.xpath('//*[@class]'):
-                    if text.lower().endswith('.'+x.get('class').lower()) and \
+                    if ltext.endswith('.'+x.get('class').lower()):
                            text.lower() != text:
                        matches.append(x)
                        found = True
                if found:
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@ -27,11 +27,14 @@ from calibre.ebooks.oeb.stylizer import Stylizer
 IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
 KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])
 class Unavailable(Exception):
    pass
 class SVGRasterizer(object):
    def __init__(self):
        from calibre.gui2 import is_ok_to_use_qt
        if not is_ok_to_use_qt():
-            raise Exception('Not OK to use Qt')
+            raise Unavailable('Not OK to use Qt')
    @classmethod
    def config(cls, cfg):
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@ -29,7 +29,7 @@ class RescaleImages(object):
        page_width, page_height = self.opts.dest.width, self.opts.dest.height
-        if not self.opts.is_image_collection:
+        if not getattr(self.opts, 'is_image_collection', False):
            page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
            page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
        for item in self.oeb.manifest:
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -11,12 +11,14 @@ class PDBError(Exception):
 from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
 from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
 from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
 from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
 FORMAT_READERS = {
    'PNPdPPrs': ereader_reader,
    'PNRdPPrs': ereader_reader,
    'zTXTGPlm': ztxt_reader,
    'TEXtREAd': palmdoc_reader,
    '.pdfADBE': pdf_reader,
 }
 from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
@ -34,8 +36,8 @@ IDENTITY_TO_NAME = {
    'PNRdPPrs': 'eReader',
    'zTXTGPlm': 'zTXT',
    'TEXtREAd': 'PalmDOC',
    '.pdfADBE': 'Adobe Reader',
    'BVokBDIC': 'BDicty',
    'DB99DBOS': 'DB (Database program)',
    'vIMGView': 'FireViewer (ImageViewer)',
--- a/src/calibre/ebooks/pdb/pdf/init.py
+++ b/src/calibre/ebooks/pdb/pdf/init.py
--- a/src/calibre/ebooks/pdb/pdf/reader.py
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@ -0,0 +1,37 @@
 # -*- coding: utf-8 -*-
 '''
 Read content from palmdoc pdb file.
 '''
 __license__   = 'GPL v3'
 __copyright__ = '2010, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ptempfile import TemporaryFile
 class Reader(FormatReader):
    def __init__(self, header, stream, log, options):
        self.header = header
        self.stream = stream
        self.log = log
        self.options = options
        setattr(self.options, 'new_pdf_engine', False)
        setattr(self.options, 'no_images', False)
        setattr(self.options, 'unwrap_factor', 0.5)
    def extract_content(self, output_dir):
        self.log.info('Extracting PDF...')
        with TemporaryFile() as pdf_n:
            pdf = open(pdf_n, 'rwb')
            for x in xrange(self.header.section_count()):
                pdf.write(self.header.section_data(x))
            from calibre.customize.ui import plugin_for_input_format
            pdf.seek(0)
            return plugin_for_input_format('pdf').convert(pdf, self.options,
                'pdf', self.log, [])
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -13,7 +13,7 @@ from functools import partial
 from calibre.ebooks import ConversionError, DRMError
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre import isosx, iswindows, islinux
+from calibre import isosx, iswindows, islinux, isfreebsd
 from calibre import CurrentDir
 PDFTOHTML = 'pdftohtml'
@ -23,7 +23,7 @@ if isosx and hasattr(sys, 'frameworks_dir'):
 if iswindows and hasattr(sys, 'frozen'):
    PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
    popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
-if islinux and getattr(sys, 'frozen_path', False):
+if (islinux or isfreebsd) and getattr(sys, 'frozen_path', False):
    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')
 def pdftohtml(output_dir, pdf_path, no_images):
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -72,14 +72,14 @@ class PML_HTMLizer(object):
        'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
        'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
        'r': ('<div style="text-align: right;">', '</div>'),
-        't': ('<div style="margin-left: 5%;">', '</div>'),
+        't': ('<div style="text-indent: 5%;">', '</div>'),
-        'T': ('<div style="margin-left: %s;">', '</div>'),
+        'T': ('<div style="text-indent: %s;">', '</div>'),
        'i': ('<span style="font-style: italic;">', '</span>'),
        'u': ('<span style="text-decoration: underline;">', '</span>'),
        'd': ('<span style="text-decoration: line-through;">', '</span>'),
        'b': ('<span style="font-weight: bold;">', '</span>'),
        'l': ('<span style="font-size: 150%;">', '</span>'),
-        'k': ('<span style="font-size: 75%;">', '</span>'),
+        'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
    }
@ -154,6 +154,11 @@ class PML_HTMLizer(object):
        self.file_name = ''
    def prepare_pml(self, pml):
        # Give Chapters the form \\*='text'text\\*. This is used for generating
        # the TOC later.
        pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
        pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
        # Remove comments
        pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)
@ -163,7 +168,7 @@ class PML_HTMLizer(object):
        pml = re.sub(r'(?mus)(?<=.)[ ]*$', '', pml)
        pml = re.sub(r'(?mus)^[ ]*$', '', pml)
-        # Footnotes and Sidebars
+        # Footnotes and Sidebars.
        pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
        pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
@ -171,9 +176,7 @@ class PML_HTMLizer(object):
        # &. It will display as &amp;
        pml = pml.replace('&', '&amp;')
-        pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
+        # Replace \\a and \\U with either the unicode character or the entity.
        pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
        pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
        pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)
@ -536,6 +539,7 @@ class PML_HTMLizer(object):
                        elif '%s%s' % (c, l) == 'Sd':
                            text = self.process_code('Sd', line, 'sb')
                    elif c in 'xXC':
                        empty = False
                        # The PML was modified eariler so x and X put the text
                        # inside of ="" so we don't have do special processing
                        # for C.
@ -578,10 +582,7 @@ class PML_HTMLizer(object):
                else:
                    if c != ' ':
                        empty = False
-                    if self.state['k'][0]:
+                    text = c
                        text = c.upper()
                    else:
                        text = c
                parsed.append(text)
                c = line.read(1)
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -131,7 +131,7 @@ class PMLMLizer(object):
                if item.href in self.link_hrefs.keys():
                    toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title))
                else:
-                    self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
+                    self.oeb_book.warn('Ignoring toc item: %s not found in document.' % item)
        return ''.join(toc)
    def get_text(self):
--- a/src/calibre/ebooks/rtf/preprocess.py
+++ b/src/calibre/ebooks/rtf/preprocess.py
@ -131,9 +131,9 @@ class RtfTokenParser():
                if isString(self.tokens[i].name, "\\'"):
                    i = i + 1
                    if not isinstance(self.tokens[i], tokenData):
-                        raise BaseException('Error: token8bitChar without data.')
+                        raise Exception('Error: token8bitChar without data.')
                    if len(self.tokens[i].data) < 2:
-                        raise BaseException('Error: token8bitChar without data.')
+                        raise Exception('Error: token8bitChar without data.')
                    newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
                    if len(self.tokens[i].data) > 2:
                        newTokens.append(tokenData(self.tokens[i].data[2:]))
@ -195,7 +195,7 @@ class RtfTokenParser():
                            i = i + 1
                            j = j + 1
                            continue
-                        raise BaseException('Error: incorect utf replacement.')
+                        raise Exception('Error: incorect utf replacement.')
                    #calibre rtf2xml does not support utfreplace
                    replace = []
@ -248,7 +248,7 @@ class RtfTokenizer():
            if isChar(self.rtfData[i], '\\'):
                if i + 1 >= len(self.rtfData):
-                    raise BaseException('Error: Control character found at the end of the document.')
+                    raise Exception('Error: Control character found at the end of the document.')
                if lastDataStart > -1:
                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
@ -269,7 +269,7 @@ class RtfTokenizer():
                        i = i + 1
                    if not consumed:
-                        raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
+                        raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))
                    #we have numeric argument before delimiter
                    if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
@ -283,10 +283,10 @@ class RtfTokenizer():
                            l = l + 1
                            i = i + 1
                            if l > 10 :
-                                raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
+                                raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
                        if not consumed:
-                            raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
+                            raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
                    separator = ''
                    if isChar(self.rtfData[i], ' '):
--- a/Show More
+++ b/Show More