merging with trunk

2025-07-07 10:14:46 -04:00 · 2010-03-05 20:18:22 -08:00 · 2010-03-05 20:18:22 -08:00 · 07e888f764
commit 07e888f764
parent 91a2881a0c 97babd672e
170 changed files with 61702 additions and 19059 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,151 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.6.44
+  date: 2010-03-05
+
+  new features:
+    - title: "Experimental support for conversion of CHM files"
+      type: major
+      description : >
+        "Conversion and reading of metadata from CHM files is now supported. This feature is
+        still experimental, with more testing needed. Building from source on linux now 
+        requires chmlib."
+
+    - title: "Experimental support for fetching annotations from the Kindles"
+      type: major
+      description: >
+        "calibre can now fetch annotations from your kindle and put them into the
+        comments field. To fetch annotations, click the arrow next to the 
+        'send to device' button and select 'Fetch Annotations', with your Kindle
+        connected."
+
+    - title: "Support FreeBSD out of the box (except USB)"
+      type: major
+      tickets: [4715]
+
+
+    - title: "News download scheduler: Don't try to download news when no active internet connection is present (linux/windows only)"
+
+    - title: "EPUB to WPUB conversion: Preserve font encryption"
+
+    - title: "calibre-server: Add --pidfile and --daemonize (unix only) options"
+
+    - title: "Plugins: When loading a plugin zip file extract to temp dir and add to sys.path, if the zip file contains binay code (pyd/dll/so/dylib), instead of just adding the zip file to the path, as python cannot load compiled code from a zip file"
+
+
+
+  bug fixes:
+    - title: "Ebook-viewer: Handle non-ascii CSS files when doing font substitutions"
+
+    - title: "Conversion pipline: Ignore non-integral play orders when parsing NCX files"
+
+    - title: "When decoding NCX toc files, if no encoding is declared and detection has less that 100% confidence, assume UTF-8."
+      tickets: [5039]
+
+    - title: "PML chapter definitions missing from toc.ncx"
+      tickets: [4990]
+
+    - title: "Unicode string for cover causes calibredb --output-format stanza to fail"
+      ticket: [5035]
+
+    - title: "Search cover:False fails, cover:True succeeds"
+      tickets: [5034]
+
+    - title: "Plugins: correctly use context"
+
+    - title: "MOBI Input: Don't lose cover if it is also referred to in main text"
+      ticket: [5020]
+
+    - title: "RTF Output: Don't choke on PNG images"
+
+  new recipes:
+    - title: Journal of Hospital Medicine, San Francisco Bay Guardian, Smithsonian Magazine
+      author: Krittika Goyal
+
+    - title: Astronomy Pick of the Day, Epicurious
+      author: Starson17
+
+    - title: Diario Vasco, Various Chilean newspapers
+      author: Darko Miletic
+
+    - title: Kukuburi
+      author: Mori
+
+
+  improved recipes:
+    - Ars Technica
+    - Fudzilla
+    - The Atlantic
+    - The Economist
+    - Huffington Post
+
+- version: 0.6.43
+  date: 2010-02-26
+
+  new features:
+    - title: "Support for the Teclast K3 and Elonex e-book readers"
+
+    - title: "Add 'Recently Read' category to catalog if Kindle is connected when catalog is generated"
+
+    - title: "When adding PRC/MOBI files that are actually Topaz files, change detected file type to Topaz"
+
+    - title: "MOBI Output: If the SVG rasterizer is not avaialbale continue anyway"
+
+    - title: "News download: When using the debug pipeline options, create a zip file named periodical.downloaded_recipe in the debug directory. This can be passed to ebook-convert to directly convert a previous download into an e-book."
+
+    - title: "Add Apply button to catalog generation dialog"
+
+  bug fixes:
+    - title: "When fetching metadata in the edit metadata dialog, use a python thread instead of a Qt thread. Hopefully this will fix the reports of crashes when fetching metadata"
+
+    - title: "Refresh cover browser when a cover is changed via the edit meta information dialog"
+
+    - title: "More device detection debug output on OS X"
+
+    - title: "Download only covers should not also set social metadata"
+      tickets: [4966]
+
+    - title: "Content server: If fail to bind to 0.0.0.0 try detecting and binding only to interface used for outgoing traffic"
+
+    - title: "Handle poorly designed import plugins that return None on error"
+
+    - title: "Move logic for removing inline navbars out of the BasicNewsRecipe class"
+
+    - title: "MOBI metadata: When setting title, set in both PalmDoc and EXTH headers"
+
+    - title: "MOBI metadata: Do not try to extarct embedded metadata from MOBI files larger than 4MB"
+
+    - title: "Handle PDB files that contain PDF files"
+      tickets: [4971]
+
+    - title: "PML Input: Various fixes"
+      tickets: [4959,4961]
+
+    - title: "Fix reading MOBI metadata from files in zip/rar archives"
+
+    - title: "Make extracting single files from RAR archives more efficient"
+
+    - title: "No longer need Qt to generate default cover for news downloads"
+
+    - title: "Catalog generation: fix for EPUB anchors beginning with numbers in Recently Added"
+
+    - title: "Searching: Handle uppercase keywords correctly"
+      tickets: [4951]
+
+
+  new recipes:
+    - title: Gamasutra
+      author: Darko Miletic
+
+  improved recipes:
+    - "Strategy+Business"
+    - Arizona Daily Star
+    - Heise
+    - New Scientist
+    - Various Serbian news feeds
+    - Houston and San Francisco Chronicles
+
 - version: 0.6.42
  date: 2010-02-20

--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -79,9 +79,24 @@ p.unread_book {
 	text-indent:-2em;
 	}

+p.date_read {
+	text-align:left;
+	margin-top:0px;
+	margin-bottom:0px;
+	margin-left:6em;
+	text-indent:-6em;
+	}
+
 hr.series_divider {
 	width:50%;
 	margin-left:1em;
 	margin-top:0em;
 	margin-bottom:0em;
 	}
+
+hr.annotations_divider {
+	width:50%;
+	margin-left:1em;
+	margin-top:0em;
+	margin-bottom:0em;
+	}
--- a/resources/images/library.png
+++ b/resources/images/library.png
--- a/resources/images/news/diariovasco.png
+++ b/resources/images/news/diariovasco.png
--- a/resources/images/news/gamasutra_fa.png
+++ b/resources/images/news/gamasutra_fa.png
--- a/resources/images/news/gamasutra_news.png
+++ b/resources/images/news/gamasutra_news.png
--- a/resources/recipes/24sata_rs.recipe
+++ b/resources/recipes/24sata_rs.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'

 '''
 24sata.rs
@ -9,7 +8,6 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Ser24Sata(BasicNewsRecipe):
    title                 = '24 Sata - Sr'
@ -17,22 +15,20 @@ class Ser24Sata(BasicNewsRecipe):
    description           = '24 sata portal vesti iz Srbije'
    publisher             = 'Ringier d.o.o.'
    category              = 'news, politics, entertainment, Serbia'
-    oldest_article        = 7
+    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'sr'
-
-    lang                  = 'sr-Latn-RS'
    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
+                        , 'linearize_tables' : True
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -40,25 +36,6 @@ class Ser24Sata(BasicNewsRecipe):
    feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]
-
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
        return self.adeify_images(soup)

    def print_version(self, url):
--- a/resources/recipes/apod.recipe
+++ b/resources/recipes/apod.recipe
@ -0,0 +1,37 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class APOD(BasicNewsRecipe):
+    title          = u'Astronomy Picture of the Day'
+    __author__  = 'Starson17'
+    description = 'Astronomy Pictures'
+    language = 'en'
+    use_embedded_content    = False
+    no_stylesheets        = True
+    cover_url     = 'http://apod.nasa.gov/apod/image/1003/m78_torregrosa.jpg'
+    remove_javascript = True
+    recursions = 0
+    oldest_article        = 14
+
+    feeds = [
+             (u'Astronomy Picture of the Day', u'http://apod.nasa.gov/apod.rss')
+             ]
+
+    extra_css = '''
+                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
+                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+		'''
+    def postprocess_html(self, soup, first_fetch):
+        center_tags = soup.findAll(['center'])
+        p_tags = soup.findAll(['p'])
+        last_center = center_tags[-1:]
+        last_center[0].extract()
+        first_p = p_tags[:1]
+        for tag in first_p:
+            tag.extract()
+        last2_p = p_tags[-2:]
+        for tag in last2_p:
+            tag.extract()
+        return soup
+
--- a/resources/recipes/ars_technica.recipe
+++ b/resources/recipes/ars_technica.recipe
@ -5,6 +5,7 @@ __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 arstechnica.com
 '''

+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

@ -20,7 +21,7 @@ class ArsTechnica2(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    extra_css             = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
+    extra_css             = ' body {font-family: Arial,Helvetica,sans-serif} .title{text-align: left} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '

    conversion_options = {
                             'comments'  : description
@ -30,6 +31,10 @@ class ArsTechnica2(BasicNewsRecipe):
                         }


+    preprocess_regexps = [
+                (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
+               ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
+                         ]

    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]

@ -37,7 +42,7 @@ class ArsTechnica2(BasicNewsRecipe):
                     dict(name=['object','link','embed'])
                    ,dict(name='div', attrs={'class':'read-more-link'})
                  ]
-
+    remove_attributes=['width','height']

    feeds = [
              (u'Infinite Loop (Apple content)'        , u'http://feeds.arstechnica.com/arstechnica/apple/'      )
@ -90,3 +95,5 @@ class ArsTechnica2(BasicNewsRecipe):

        return soup

+    def get_article_url(self, article):
+        return article.get('guid',  None).rpartition('?')[0]
--- a/resources/recipes/atlantic.recipe
+++ b/resources/recipes/atlantic.recipe
@ -5,76 +5,103 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 theatlantic.com
 '''
-import re
+import string
+
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag, NavigableString

 class TheAtlantic(BasicNewsRecipe):

    title      = 'The Atlantic'
    __author__ = 'Kovid Goyal and Sujata Raman'
    description = 'Current affairs and politics focussed on the US'
-    INDEX = 'http://www.theatlantic.com/doc/current'
+    INDEX = 'http://www.theatlantic.com/magazine/toc/0/'
    language = 'en'

-    remove_tags_before = dict(name='div', id='storytop')
-    remove_tags        = [
-                        dict(name='div', id=['seealso','storybottom',  'footer', 'ad_banner_top', 'sidebar','articletoolstop','subcontent',]),
-                        dict(name='p', attrs={'id':["pagination"]}),
-                        dict(name='table',attrs={'class':"tools"}),
-                        dict(name='style'),
-                        dict(name='a', href='/a/newsletters.mhtml')
-                         ]
-    remove_attributes = ['icap', 'callout', 'style']
+    remove_tags_before = dict(name='div', id='articleHead')
+    remove_tags_after  = dict(id='copyright')
+    remove_tags        = [dict(id=['header', 'printAds', 'pageControls'])]
    no_stylesheets = True
-    conversion_options = { 'linearize_tables':True }

-    extra_css = '''
-                    #timestamp{font-family:Arial,Helvetica,sans-serif; color:#666666 ;font-size:x-small}
-                    #storytype{font-family:Arial,Helvetica,sans-serif; color:#D52B1E ;font-weight:bold; font-size:x-small}
-                    h2{font-family:georgia,serif; font-style:italic;font-size:x-small;font-weight:normal;}
-                    h1{font-family:georgia,serif; font-weight:bold; font-size:large}
-                    #byline{font-family:georgia,serif; font-weight:bold; font-size:x-small}
-                    #topgraf{font-family:Arial,Helvetica,sans-serif;font-size:x-small;font-weight:bold;}
-                    .artsans{{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
-                '''
+
+    def print_version(self, url):
+        return url.replace('/archive/', '/print/')
+
    def parse_index(self):
        articles = []

        soup = self.index_to_soup(self.INDEX)
+        sectit = soup.find('h1', attrs={'class':'sectionTitle'})
+        if sectit is not None:
+            texts = sectit.findAll('cufontext')
+            texts = map(self.tag_to_string, texts[-2:])
+            self.timefmt = ' [%s]'%(''.join(texts))

-        issue = soup.find('span', attrs={'class':'issue'})
-        if issue:
-            self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
-
-        cover = soup.find('img', alt=re.compile('Cover'), src=True)
+        cover = soup.find('img', src=True, attrs={'class':'cover'})
        if cover is not None:
-            self.cover_url = 'http://theatlantic.com'+cover['src']
+            self.cover_url = cover['src']

-        for item in soup.findAll('div', attrs={'class':'item'}):
-            a = item.find('a')
-            if a and a.has_key('href'):
+        feeds = []
+        for section in soup.findAll('div', attrs={'class':'magazineSection'}):
+            section_title = section.find(attrs={'class':'sectionHeader'})
+            section_title = string.capwords(self.tag_to_string(section_title))
+            self.log('Found section:', section_title)
+            articles = []
+            for post in section.findAll('div', attrs={'class':'post'}):
+                h = post.find(['h3', 'h4'])
+                title = self.tag_to_string(h)
+                a = post.find('a', href=True)
                url = a['href']
-                if not url.startswith('http://'):
-                    url = 'http://www.theatlantic.com/'+url
-                url = url.replace('/doc/', '/doc/print/')
-                title = self.tag_to_string(a)
-                if title in ('VIDEO', 'AUDIO', 'INTERACTIVE MAP', 'SIDEBAR', 'RECIPES'):
-                    continue
-                title = title.replace('&AMP;', '&')
-                byline = item.find(attrs={'class':'byline'})
-                date = self.tag_to_string(byline) if byline else ''
-                description = ''
+                if url.startswith('/'):
+                    url = 'http://www.theatlantic.com'+url
+                p = post.find('p', attrs={'class':'dek'})
+                desc = None
+                self.log('\tFound article:', title, 'at', url)
+                if p is not None:
+                    desc = self.tag_to_string(p)
+                    self.log('\t\t', desc)
+                articles.append({'title':title, 'url':url, 'description':desc,
+                    'date':''})
+            feeds.append((section_title, articles))

-                self.log('\tFound article:', title)
-                self.log('\t\t', url)
+        poems = []
+        self.log('Found section: Poems')
+        for poem in soup.findAll('div', attrs={'class':'poem'}):
+            title = self.tag_to_string(poem.find('h4'))
+            desc  = self.tag_to_string(poem.find(attrs={'class':'author'}))
+            url   = 'http://www.theatlantic.com'+poem.find('a')['href']
+            self.log('\tFound article:', title, 'at', url)
+            self.log('\t\t', desc)
+            poems.append({'title':title, 'url':url, 'description':desc,
+                    'date':''})
+        if poems:
+            feeds.append(('Poems', poems))

-                articles.append({
-                                 'title':title,
-                                 'date':date,
-                                 'url':url,
-                                 'description':description
-                            })
+        self.log('Found section: Advice')
+        div = soup.find(id='advice')
+        title = self.tag_to_string(div.find('h4'))
+        url = 'http://www.theatlantic.com'+div.find('a')['href']
+        desc = self.tag_to_string(div.find('p'))
+        self.log('\tFound article:', title, 'at', url)
+        self.log('\t\t', desc)

+        feeds.append(('Advice', [{'title':title, 'url':url, 'description':desc,
+                    'date':''}]))
+        return feeds

+    def postprocess_html(self, soup, first):
+        for table in soup.findAll('table', align='right'):
+            img = table.find('img')
+            if img is not None:
+                img.extract()
+                caption = self.tag_to_string(table).strip()
+                div = Tag(soup, 'div')
+                div['style'] = 'text-align:center'
+                div.insert(0, img)
+                div.insert(1, Tag(soup, 'br'))
+                if caption:
+                    div.insert(2, NavigableString(caption))
+                table.replaceWith(div)
+
+        return soup

-        return [('Current Issue', articles)]
--- a/resources/recipes/azstarnet.recipe
+++ b/resources/recipes/azstarnet.recipe
@ -1,10 +1,10 @@

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
-www.azstarnet.com
+azstarnet.com
 '''
-
+import urllib
 from calibre.web.feeds.news import BasicNewsRecipe

 class Azstarnet(BasicNewsRecipe):
@ -14,12 +14,12 @@ class Azstarnet(BasicNewsRecipe):
    language              = 'en'
    publisher             = 'azstarnet.com'
    category              = 'news, politics, Arizona, USA'
-    delay                 = 1
    oldest_article        = 3
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
+    masthead_url          = 'http://azstarnet.com/content/tncms/live/global/resources/images/logo.gif'
    needs_subscription    = True

    conversion_options = {
@ -32,31 +32,27 @@ class Azstarnet(BasicNewsRecipe):

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
+        br.open('http://azstarnet.com/')
        if self.username is not None and self.password is not None:
-            br.open('http://azstarnet.com/registration/retro.php')
-            br.select_form(nr=1)
-            br['email'] = self.username
-            br['pass' ] = self.password
-            br.submit()
+            data = urllib.urlencode({ 'm':'login'
+                                     ,'u':self.username
+                                     ,'p':self.password
+                                     ,'z':'http://azstarnet.com/'
+                                   })
+            br.open('http://azstarnet.com/app/registration/proxy.php',data)
        return br

-
-    keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
-
-    remove_tags = [
-                     dict(name=['object','link','iframe','base','img'])
-                    ,dict(name='div',attrs={'class':'bannerinstory'})
-                  ]
+    remove_tags = [dict(name=['object','link','iframe','base','img'])]


    feeds = [
-               (u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')
-              ,(u'Sports'       , u'http://rss.azstarnet.com/index.php?site=sports')
-              ,(u'Business'     , u'http://rss.azstarnet.com/index.php?site=biz-topheadlines')
-              ,(u'Nation-World' , u'http://rss.azstarnet.com/index.php?site=news')
-              ,(u'Opinion'      , u'http://rss.azstarnet.com/index.php?site=opinion')
-              ,(u'Lifestyle'    , u'http://rss.azstarnet.com/index.php?site=accent')
-              ,(u'Food'         , u'http://rss.azstarnet.com/index.php?site=food')
+               (u'Local News'    , u'http://azstarnet.com/search/?f=rss&t=article&c=news/local&l=25&s=start_time&sd=desc')
+              ,(u'National News' , u'http://azstarnet.com/search/?f=rss&t=article&c=news/national&l=25&s=start_time&sd=desc')
+              ,(u'World News'    , u'http://azstarnet.com/search/?f=rss&t=article&c=news/world&l=25&s=start_time&sd=desc')
+              ,(u'Sports'        , u'http://azstarnet.com/search/?f=rss&t=article&c=sports&l=25&s=start_time&sd=desc')
+              ,(u'Opinion'       , u'http://azstarnet.com/search/?f=rss&t=article&c=news/opinion&l=25&s=start_time&sd=desc')
+              ,(u'Movies'        , u'http://azstarnet.com/search/?f=rss&t=article&c=entertainment/movies&l=25&s=start_time&sd=desc')
+              ,(u'Food'          , u'http://azstarnet.com/search/?f=rss&t=article&c=lifestyles/food-and-cooking&l=25&s=start_time&sd=desc')
            ]

    def preprocess_html(self, soup):
@ -64,4 +60,6 @@ class Azstarnet(BasicNewsRecipe):
            del item['style']
        return soup

+    def print_version(self, url):
+        return url + '?print=1'

--- a/resources/recipes/b92.recipe
+++ b/resources/recipes/b92.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 b92.net
 '''
@ -20,15 +19,14 @@ class B92(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'cp1250'
    language              = 'sr'
-
-    lang                  = 'sr-Latn-RS'
-    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} '
    
    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
+                        , 'language'         : language
+                        , 'linearize_tables' : True
                        }
    
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -50,20 +48,5 @@ class B92(BasicNewsRecipe):
        return url + '&version=print'

    def preprocess_html(self, soup):
-        del soup.body['onload']
-        for item in soup.findAll('font'):
-            item.name='div'
-            if item.has_key('size'):
-               del item['size']
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]                           
-        return soup
+        return self.adeify_images(soup)
+
--- a/resources/recipes/beta.recipe
+++ b/resources/recipes/beta.recipe
@ -1,13 +1,11 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 beta.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class Danas(BasicNewsRecipe):
    title                 = 'BETA'
@ -20,17 +18,13 @@ class Danas(BasicNewsRecipe):
    no_stylesheets        = False
    use_embedded_content  = True
    language              = 'sr'
-
-    lang                  = 'sr-Latn-RS'
-    direction             = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
                        }


@ -43,9 +37,4 @@ class Danas(BasicNewsRecipe):
                     ]

    def preprocess_html(self, soup):
-        soup.html['lang'] = self.lang
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
        return self.adeify_images(soup)
--- a/resources/recipes/blic.recipe
+++ b/resources/recipes/blic.recipe
@ -14,14 +14,13 @@ class Blic(BasicNewsRecipe):
    description           = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
    publisher             = 'RINGIER d.o.o.'
    category              = 'news, politics, Serbia'
-    delay                 = 1
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
+    masthead_url          = 'http://www.blic.rs/resources/images/header/header_back.png'
    language              = 'sr'
-
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Georgia, serif1, serif} .article_description{font-family: Arial, sans1, sans-serif} .img_full{float: none} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'  : description
@ -31,13 +30,15 @@ class Blic(BasicNewsRecipe):
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-
    remove_tags_before = dict(name='div', attrs={'id':'article_info'})
+    remove_tags        = [dict(name=['object','link'])]
+    remove_attributes  = ['width','height']

    feeds              = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]

-    remove_tags        = [dict(name=['object','link'])]

    def print_version(self, url):
        return url + '/print'

+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/resources/recipes/cetnixploitation.recipe
+++ b/resources/recipes/cetnixploitation.recipe
@ -0,0 +1,36 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+chetnixploitation.blogspot.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Chetnixploitation(BasicNewsRecipe):
+    title                 = 'Chetnixploitation'
+    __author__            = 'Darko Miletic'
+    description           = 'Filmski blog'    
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    language              = 'sr'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = True
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'film, blog, cetnici, srbija, ex-yu'
+                        , 'publisher': 'Son of Man'
+                        , 'language' : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+    feeds = [(u'Posts', u'http://chetnixploitation.blogspot.com/feeds/posts/default')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/danas.recipe
+++ b/resources/recipes/danas.recipe
@ -20,7 +20,7 @@ class Danas(BasicNewsRecipe):
    encoding              = 'utf-8'
    masthead_url          = 'http://www.danas.rs/images/basic/danas.gif'
    language              = 'sr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; color:#666666; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif} .nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif} .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
--- a/resources/recipes/diariovasco.recipe
+++ b/resources/recipes/diariovasco.recipe
@ -0,0 +1,50 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.diariovasco.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DiarioVasco(BasicNewsRecipe):
+    title                 = 'Diario Vasco'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias de pais Vasco y el resto del mundo'
+    publisher             = 'Diario Vasco'
+    category              = 'news, politics, Spain'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.diariovasco.com/img/rd.logotipo2_dvasco.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags = [
+                         dict(attrs={'id':'title'})
+                        ,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
+                     ]
+    remove_tags = [dict(name='ul')]
+    remove_attributes = ['width','height']
+
+
+    feeds = [
+              (u'Ultimas Noticias' , u'http://www.diariovasco.com/rss/feeds/ultima.xml'       )
+             ,(u'Portada'          , u'http://www.diariovasco.com/portada.xml'                )
+             ,(u'Politica'         , u'http://www.diariovasco.com/rss/feeds/politica.xml'     )
+             ,(u'Deportes'         , u'http://www.diariovasco.com/rss/feeds/deportes.xml'     )
+             ,(u'Economia'         , u'http://www.diariovasco.com/rss/feeds/economia.xml'     )
+             ,(u'Mundo'            , u'http://www.diariovasco.com/rss/feeds/mundo.xml'        )
+             ,(u'Cultura'          , u'http://www.diariovasco.com/rss/feeds/cultura.xml'      )
+             ,(u'Gente'            , u'http://www.diariovasco.com/rss/feeds/gente.xml'        )
+             ,(u'Contraportada'    , u'http://www.diariovasco.com/rss/feeds/contraportada.xml')
+            ]
--- a/resources/recipes/e_novine.recipe
+++ b/resources/recipes/e_novine.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'

 '''
 e-novine.com
@ -9,7 +7,6 @@ e-novine.com

 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class E_novine(BasicNewsRecipe):
    title                 = 'E-Novine'
@ -20,40 +17,38 @@ class E_novine(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'cp1250'
+    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'sr'
-
-    lang                  = 'sr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    masthead_url          = 'http://www.e-novine.com/themes/e_novine/img/logo.gif'
+    extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif} img{float: none; margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

-    keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
+    keep_only_tags = [
+                         dict(name='div', attrs={'class':'article_head'})
+                        ,dict(name='div', attrs={'id':'article_body'})
+                     ]

-    remove_tags = [dict(name=['object','link','embed','iframe'])]
+    remove_tags = [
+                     dict(name=['object','link','embed','iframe'])
+                    ,dict(attrs={'id':'box_article_tools'})
+                  ]
+    remove_attributes = ['height','width','lang']

-    feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
+    feeds = [(u'Sve vesti', u'http://www.e-novine.com/feed/index.1.rss' )]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
        for item in soup.findAll(style=True):
            del item['style']
-        ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
-        if ftag:
-           it = ftag.div
-           it.extract()
-           ftag.div.extract()
-           ftag.insert(0,it)
-        return soup
+        return self.adeify_images(soup)
+
+    def print_version(self, url):
+        return url + '?print'
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@ -119,6 +119,8 @@ class Economist(BasicNewsRecipe):
            ns = NavigableString(self.tag_to_string(caption))
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
+            del img['width']
+            del img['height']
            img.extract()
            div.insert(2, img)
            table.replaceWith(div)
--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -123,6 +123,8 @@ class Economist(BasicNewsRecipe):
            div.insert(0, ns)
            div.insert(1, Tag(soup, 'br'))
            img.extract()
+            del img['width']
+            del img['height']
            div.insert(2, img)
            table.replaceWith(div)
        return soup
--- a/resources/recipes/epicurious.recipe
+++ b/resources/recipes/epicurious.recipe
@ -0,0 +1,58 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Starson17'
+'''
+www.epicurious.com
+'''
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Epicurious(BasicNewsRecipe):
+    title          = u'Epicurious'
+    __author__  = 'Starson17'
+    description = 'Food and Recipes from Epicurious'
+    cover_url     = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
+    publisher      = 'Epicurious'
+    tags           = 'news, food, gourmet, recipes'
+    language = 'en'
+    use_embedded_content    = False
+    no_stylesheets        = True
+    remove_javascript = True
+    recursions = 3
+    oldest_article        = 14
+    max_articles_per_feed = 20
+
+    keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
+                      dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
+                           ]
+
+    remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
+                   {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
+                   dict(name='div', attrs={'class':['tagged','comments']})
+                   ]
+
+    remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
+
+    feeds = [
+             (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
+             (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
+             (u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
+             (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
+             ]
+
+    match_regexps = [
+                     r'http://www.epicurious.com/.*recipes/.*/views'
+                     ]
+
+    preprocess_regexps = [
+        (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
+        (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
+        (re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
+        ]
+
+    def postprocess_html(self, soup, first_fetch):
+        for t in soup.findAll(['table', 'tr', 'td']):
+            t.name = 'div'
+        return soup
+
--- a/resources/recipes/fudzilla.recipe
+++ b/resources/recipes/fudzilla.recipe
@ -1,27 +1,41 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010 Starson17'
 '''
 fudzilla.com
 '''

+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class Fudzilla(BasicNewsRecipe):
    title                 = u'Fudzilla'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Starson17'
    language = 'en'

    description           = 'Tech news'
    oldest_article        = 7
+    remove_javascript = True
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False

-    feeds = [ (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')]

-    def print_version(self, url):
-        nurl = url.replace('http://www.fudzilla.com/index.php','http://www.fudzilla.com/index2.php')
-        nmain, nsep, nrest = nurl.partition('&Itemid=')
-        return  nmain + '&pop=1&page=0&Itemid=1'
+    remove_tags_before = dict(name='div', attrs={'class':['padding']})
+
+    remove_tags = [dict(name='td', attrs={'class':['left','right']}),
+                   dict(name='div', attrs={'id':['toolbar','buttons']}), 
+                   dict(name='div', attrs={'class':['artbannersxtd','back_button']}), 
+                   dict(name='span', attrs={'class':['pathway']}), 
+                   dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}), 
+                   dict(name='table', attrs={'class':['headlines']}), 
+                   ]
+
+    feeds = [
+             (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
+             ]
+
+    preprocess_regexps = [
+        (re.compile(r'<p class="MsoNormal"> Welcome.*</p> ', re.DOTALL|re.IGNORECASE), lambda match: '')
+        ]
--- a/resources/recipes/gamasutra_fa.recipe
+++ b/resources/recipes/gamasutra_fa.recipe
@ -0,0 +1,56 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+gamasutra.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gamasutra(BasicNewsRecipe):
+    title                 = 'Gamasutra Featured articles'
+    __author__            = 'Darko Miletic'
+    description           = 'The Art and Business of Making Games'
+    publisher             = 'Gamasutra'
+    category              = 'news, games, IT'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
+    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .title{font-size: x-large; font-weight: bold} '
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+    preprocess_regexps = [
+                           (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
+                          ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
+                          ,(re.compile(r'</head>', re.DOTALL|re.IGNORECASE),lambda match: '</head><body>')
+                         ]
+    remove_tags       = [
+                          dict(name=['object','embed','iframe'])
+                         ,dict(attrs={'class':'adBox'})
+                         ]
+    remove_tags_before = dict(attrs={'class':'title'})
+    remove_attributes = ['width','height','name']
+
+    feeds = [(u'Feature Articles', u'http://feeds.feedburner.com/GamasutraFeatureArticles')]
+
+    def print_version(self, url):
+        return url + '?print=1'
+
+    def get_article_url(self, article):
+        return article.get('guid',  None)
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
--- a/resources/recipes/gamasutra_news.recipe
+++ b/resources/recipes/gamasutra_news.recipe
@ -0,0 +1,45 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+gamasutra.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gamasutra(BasicNewsRecipe):
+    title                 = 'Gamasutra News'
+    __author__            = 'Darko Miletic'
+    description           = 'The Art and Business of Making Games'
+    publisher             = 'Gamasutra'
+    category              = 'news, games, IT'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.gamasutra.com/images/gamasutra_logo.gif'
+    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .newsTitle{font-size: xx-large; font-weight: bold} '
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+    remove_tags       = [dict(attrs={'class':['relatedNews','adBox']})]
+    keep_only_tags    = [dict(attrs={'class':['newsTitle','newsAuth','newsDate','newsText']})]
+    remove_attributes = ['width','height']
+
+    feeds = [(u'News', u'http://feeds.feedburner.com/GamasutraNews')]
+
+    def get_article_url(self, article):
+        return article.get('guid',  None)
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
--- a/resources/recipes/glas_srpske.recipe
+++ b/resources/recipes/glas_srpske.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'

 '''
 glassrpske.com
@ -9,7 +8,6 @@ glassrpske.com

 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class GlasSrpske(BasicNewsRecipe):
    title                 = 'Glas Srpske'
@ -22,20 +20,16 @@ class GlasSrpske(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    cover_url             = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
-    lang                  = 'sr-BA'
+    masthead_url          = 'http://www.glassrpske.com/var/slike/glassrpske-logo.png'
    language              = 'sr'
-
    INDEX                 = 'http://www.glassrpske.com'
-
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -63,11 +57,7 @@ class GlasSrpske(BasicNewsRecipe):
            ]

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        soup.head.insert(0,mlang)
-        return soup
+        return self.adeify_images(soup)

    def parse_index(self):
        totalfeeds = []
--- a/resources/recipes/glasjavnosti.recipe
+++ b/resources/recipes/glasjavnosti.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.glas-javnosti.rs
 '''
@ -19,17 +18,13 @@ class GlasJavnosti(BasicNewsRecipe):
    no_stylesheets        = False
    use_embedded_content  = False
    language              = 'sr'
-
-    lang                  = 'sr-Latn-RS'
-    direction             = 'ltr'
-    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    extra_css             = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
                        }


--- a/resources/recipes/heise.recipe
+++ b/resources/recipes/heise.recipe
@ -13,8 +13,6 @@ class heiseDe(BasicNewsRecipe):
    title = 'heise'
    description = 'Computernews from Germany'
    __author__ = 'Oliver Niesner'
-    language = 'de'
-
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    max_articles_per_feed = 40
@ -35,12 +33,10 @@ class heiseDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}),
 		   dict(name='p', attrs={'class':'news_option'}),
 		   dict(name='p', attrs={'class':'news_navi'}),
-		   dict(name='p', attrs={'class':'news_foren'})]
-    remove_tags_after = [dict(name='p', attrs={'class':'news_foren'})]
+		   dict(name='div', attrs={'class':'news_foren'})]
+    remove_tags_after = [dict(name='div', attrs={'class':'news_foren'})]
    
    feeds =  [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ] 
    


-
-
--- a/resources/recipes/houston_chronicle.recipe
+++ b/resources/recipes/houston_chronicle.recipe
@ -1,17 +1,41 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from calibre.web.feeds.news import BasicNewsRecipe

 class HoustonChronicle(BasicNewsRecipe):

    title          = u'The Houston Chronicle'
    description    = 'News from Houston, Texas'
-    __author__	   = 'Kovid Goyal'
+    __author__	   = 'Kovid Goyal and Sujata Raman'
    language       = 'en'
    timefmt        = ' [%a, %d %b, %Y]'
    no_stylesheets = True

-    keep_only_tags = [dict(id=['story-head', 'story'])]
-    remove_tags = [dict(id=['share-module', 'resource-box',
-        'resource-box-header'])]
+    keep_only_tags = [
+                        dict(id=['story-head', 'story'])
+                     ]
+
+    remove_tags    = [
+                        dict(id=['share-module', 'resource-box',
+                        'resource-box-header'])
+                     ]
+
+    extra_css      = '''
+                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
+                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
+                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
+                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
+                        #story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
+                        #story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
+                        #story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        #story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        #story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
+                        #Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
+                        .p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
+                        .p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
+                     '''
+

    def parse_index(self):
        soup = self.index_to_soup('http://www.chron.com/news/')
@ -64,3 +88,6 @@ class HoustonChronicle(BasicNewsRecipe):
            feeds.append((current_section, current_articles))
        return feeds

+
+
+
--- a/resources/recipes/huffingtonpost.recipe
+++ b/resources/recipes/huffingtonpost.recipe
@ -3,7 +3,7 @@ import re

 class HuffingtonPostRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
-    __author__ = 'kwetal'
+    __author__ = 'kwetal and Archana Raman'
    language = 'en'
    version = 2

@ -14,70 +14,89 @@ class HuffingtonPostRecipe(BasicNewsRecipe):

    oldest_article = 1.1
    max_articles_per_feed = 100
-    use_embedded_content = True
+    #use_embedded_content = True

    encoding = 'utf-8'
    remove_empty_feeds = True
+    no_stylesheets = True
+    remove_javascript = True

    # Feeds from: http://www.huffingtonpost.com/syndication/
    feeds = []
    feeds.append((u'Latest News', u'http://feeds.huffingtonpost.com/huffingtonpost/LatestNews'))

-    #feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
-    feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
-    feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))
+    feeds.append((u'Politics', u'http://www.huffingtonpost.com/feeds/verticals/politics/index.xml'))
+    #feeds.append((u'Politics: News', u'http://www.huffingtonpost.com/feeds/verticals/politics/news.xml'))
+    #feeds.append((u'Politics: Blog', u'http://www.huffingtonpost.com/feeds/verticals/politics/blog.xml'))

-    #feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
-    feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
-    feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))
+    feeds.append((u'Media', u'http://www.huffingtonpost.com/feeds/verticals/media/index.xml'))
+    #feeds.append((u'Media: News', u'http://www.huffingtonpost.com/feeds/verticals/media/news.xml'))
+    #feeds.append((u'Media: Blog', u'http://www.huffingtonpost.com/feeds/verticals/media/blog.xml'))

-    #feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
-    feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
-    feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))
+    feeds.append((u'Business', u'http://www.huffingtonpost.com/feeds/verticals/business/index.xml'))
+    #feeds.append((u'Business: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
+    #feeds.append((u'Business: Blogs', u'http://www.huffingtonpost.com/feeds/verticals/business/blog.xml'))

-    #feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
-    feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
-    feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))
+    feeds.append((u'Entertainment', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/index.xml'))
+    #feeds.append((u'Entertainment: News', u'http://www.huffingtonpost.com/feeds/verticals/business/news.xml'))
+    #feeds.append((u'Entertainment: Blog', u'http://www.huffingtonpost.com/feeds/verticals/entertainment/blog.xml'))

-    #feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
-    feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
-    feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))
+    feeds.append((u'Living', u'http://www.huffingtonpost.com/feeds/verticals/living/index.xml'))
+    #feeds.append((u'Living: News', u'http://www.huffingtonpost.com/feeds/verticals/living/news.xml'))
+    #feeds.append((u'Living: Blog', u'http://www.huffingtonpost.com/feeds/verticals/living/blog.xml'))

-    #feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
-    feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
-    feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))
+    feeds.append((u'Style', u'http://www.huffingtonpost.com/feeds/verticals/style/index.xml'))
+    #feeds.append((u'Style: News', u'http://www.huffingtonpost.com/feeds/verticals/style/news.xml'))
+    #feeds.append((u'Style: Blog', u'http://www.huffingtonpost.com/feeds/verticals/style/blog.xml'))

-    #feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
-    feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
-    feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))
+    feeds.append((u'Green', u'http://www.huffingtonpost.com/feeds/verticals/green/index.xml'))
+    #feeds.append((u'Green: News', u'http://www.huffingtonpost.com/feeds/verticals/green/news.xml'))
+    #feeds.append((u'Green: Blog', u'http://www.huffingtonpost.com/feeds/verticals/green/blog.xml'))

-    #feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
-    feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
-    feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))
+    feeds.append((u'Technology', u'http://www.huffingtonpost.com/feeds/verticals/technology/index.xml'))
+    #feeds.append((u'Technology: News', u'http://www.huffingtonpost.com/feeds/verticals/technology/news.xml'))
+    #feeds.append((u'Technology: Blog', u'http://www.huffingtonpost.com/feeds/verticals/technology/blog.xml'))

-    #feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
-    feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
-    feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))
+    feeds.append((u'Comedy', u'http://www.huffingtonpost.com/feeds/verticals/comedy/index.xml'))
+    #feeds.append((u'Comedy: News', u'http://www.huffingtonpost.com/feeds/verticals/comedy/news.xml'))
+    #feeds.append((u'Comedy: Blog', u'http://www.huffingtonpost.com/feeds/verticals/comedy/blog.xml'))

-    #feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
-    feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
-    feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))
+    feeds.append((u'World', u'http://www.huffingtonpost.com/feeds/verticals/world/index.xml'))
+    #feeds.append((u'World: News', u'http://www.huffingtonpost.com/feeds/verticals/world/news.xml'))
+    #feeds.append((u'World: Blog', u'http://www.huffingtonpost.com/feeds/verticals/world/blog.xml'))

    feeds.append((u'Original Reporting', u'http://www.huffingtonpost.com/tag/huffpolitics/feed'))
-    feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
+    #feeds.append((u'Original Posts', u'http://www.huffingtonpost.com/feeds/original_posts/index.xml'))
+

    remove_tags = []
    remove_tags.append(dict(name='a', attrs={'href' : re.compile('http://feedads\.g\.doubleclick.net.*')}))
    remove_tags.append(dict(name='div', attrs={'class' : 'feedflare'}))
+    remove_tags.append(dict(name='a', attrs={'class' : 'home_pixie'}))
+    remove_tags.append(dict(name='div', attrs={'id' : ["top_nav",'threeup_top_wrapper','breaking_news_container',"hp_social_network"]}))
+    remove_tags.append(dict(name='img', alt="Connect"))
+    remove_tags.append(dict(name='div', attrs={'class' : ['logo']}))    #'share_boxes_box_block_b_wraper',
+    remove_tags.append(dict(name='div', attrs={'class' :[ 'read_more with_verticals','chicklets_box_outter_v05','blogger_menu_content','chicklets_bar']}))
+    remove_tags.append(dict(name='div', attrs={'class' : ['sidebar_blog_first_design','sidebar_blog_second_design',]}))
+    remove_tags.append(dict(name='div', attrs={'class' : ['main_big_news_ontop','login-menu','sidebar_blog_third_design','read_more']}))
+
+
+    remove_tags_after = [dict(name='div', attrs={'class' : 'entry_content'}) ]
+   # remove_attributes = ['style']

-    remove_attributes = ['style']

    extra_css = '''
+                    h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
+                    h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
+                    h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
                    body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
-                    h2{font-size: x-large; font-weight: bold; padding: 0em; margin-bottom: 0.2em;}
-                    a[href]{color: blue; text-decoration: none; cursor: pointer;}
+                    #title_permalink{color:black;font-size:large;}
+                    .date{color:#858585;font-family:"Times New Roman",sans-serif;}
+                    .comments_datetime v05{color:#696969;}
+                    .teaser_permalink{font-style:italic;font-size:xx-small;}
+                    .blog_posted_date{color:#696969;font-size:xx-small;font-weight: bold;}
                    '''
-
+#a[href]{color: blue; text-decoration: none; cursor: pointer;}
    def get_article_url(self, article):
        """
            Workaround for Feedparser behaviour. If an item has more than one <link/> element, article.link is empty and
@ -85,10 +104,21 @@ class HuffingtonPostRecipe(BasicNewsRecipe):
            Todo: refactor to searching this list to avoid the hardcoded zero-index
        """
        link = article.get('link')
+        print("Link:"+link)
        if not link:
            links = article.get('links')
            if links:
                link = links[0]['href']
+                if not links[0]['href']:
+                    link = links[1]['href']

        return link

+    def postprocess_html(self, soup, first_fetch):
+        for tag in soup.findAll('div',text = "What's Your Reaction?"):
+                tag.extract()
+
+        for tg in soup.findAll('blockquote'):
+            tg.extract()
+
+        return soup
--- a/resources/recipes/johm.recipe
+++ b/resources/recipes/johm.recipe
@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class JournalofHospitalMedicine(BasicNewsRecipe):
+
+    title       = 'Journal of Hospital Medicine'
+    __author__  = 'Krittika Goyal'
+    description = 'Medical news'
+    timefmt = ' [%d %b, %Y]'
+    needs_subscription = True
+
+    no_stylesheets = True
+    #remove_tags_before = dict(name='div', attrs={'align':'center'})
+    #remove_tags_after  = dict(name='ol', attrs={'compact':'COMPACT'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':'subContent'}),
+       dict(name='div', attrs={'id':['contentFrame']}),
+       #dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
+       #dict(name='table', attrs={'align':'RIGHT'}),
+    ]
+
+
+
+   # TO LOGIN
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open('http://www3.interscience.wiley.com/cgi-bin/home')
+        br.select_form(name='siteLogin')
+        br['LoginName'] = self.username
+        br['Password'] = self.password
+        response = br.submit()
+        raw = response.read()
+        if 'userName = ""' in raw:
+            raise Exception('Login failed. Check your username and password')
+        return br
+
+    #TO GET ARTICLE TOC
+    def johm_get_index(self):
+            return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
+
+    # To parse artice toc
+    def parse_index(self):
+            parse_soup = self.johm_get_index()
+
+            div = parse_soup.find(id='contentCell')
+
+            current_section = None
+            current_articles = []
+            feeds = []
+            for x in div.findAll(True):
+                if x.name == 'h4':
+                    # Section heading found
+                    if current_articles and current_section:
+                        feeds.append((current_section, current_articles))
+                    current_section = self.tag_to_string(x)
+                    current_articles = []
+                    self.log('\tFound section:', current_section)
+                if current_section is not None and x.name == 'strong':
+                    title = self.tag_to_string(x)
+                    p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
+                    if p is None:
+                        continue
+                    url = p.get('href', False)
+                    if not url or not title:
+                        continue
+                    if url.startswith('/'):
+                         url = 'http://www3.interscience.wiley.com'+url
+                    url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    #if url.startswith('/'):
+                        #url = 'http://online.wsj.com'+url
+                    current_articles.append({'title': title, 'url':url,
+                        'description':'', 'date':''})
+
+            if current_articles and current_section:
+                feeds.append((current_section, current_articles))
+
+            return feeds
+
+    def preprocess_html(self, soup):
+        for img in soup.findAll('img', src=True):
+            img['src'] = img['src'].replace('tfig', 'nfig')
+        return soup
+
--- a/resources/recipes/kathemerini.recipe
+++ b/resources/recipes/kathemerini.recipe
--- a/resources/recipes/kukuburi.recipe
+++ b/resources/recipes/kukuburi.recipe
@ -0,0 +1,37 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'Mori'
+__version__ = 'v. 0.1'
+'''
+Kukuburi.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class KukuburiRecipe(BasicNewsRecipe):
+    __author__ = 'Mori'
+    language = 'en'
+
+    title = u'Kukuburi'
+    publisher = u'Ramón Pérez'
+    description =u'KUKUBURI by Ram\xc3\xb3n P\xc3\xa9rez'
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    oldest_article = 100
+    max_articles_per_feed = 100
+
+    feeds = [
+        (u'Kukuburi', u'http://feeds2.feedburner.com/Kukuburi')
+    ]
+
+    preprocess_regexps = [
+        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+        [
+            (r'<!--.*?-->', lambda match: ''),
+            (r'<div class="feedflare".*?</div>', lambda match: '')
+        ]
+    ]
--- a/resources/recipes/la_segunda.recipe
+++ b/resources/recipes/la_segunda.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 lasegunda.com
 '''
@ -19,43 +17,38 @@ class LaSegunda(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
-    cover_url             = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
-    remove_javascript     = True
+    masthead_url          = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
+    remove_empty_feeds    = True
    language              = 'es'
+    extra_css             = ' .titulonegritastop{font-size: xx-large; font-weight: bold} '            
    
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+						, 'linearize_tables' : True
+                        }
                        
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
+    remove_tags_before = dict(attrs={'class':'titulonegritastop'})
+    remove_tags        = [dict(name='img')]
+    remove_attributes  = ['width','height']
 	
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' 
-                        
-    keep_only_tags = [dict(name='table')]
                        
    feeds = [ 
               (u'Noticias de ultima hora', u'http://www.lasegunda.com/rss20/index.asp?canal=0')
-              ,(u'Politica', u'http://www.lasegunda.com/rss20/index.asp?canal=21')
-              ,(u'Cronica', u'http://www.lasegunda.com/rss20/index.asp?canal=20')
-              ,(u'Internacional', u'http://www.lasegunda.com/rss20/index.asp?canal=23')
-              ,(u'Deportes', u'http://www.lasegunda.com/rss20/index.asp?canal=24')
-              ,(u'Epectaculos/Cultura', u'http://www.lasegunda.com/rss20/index.asp?canal=25')
-              ,(u'Educacion', u'http://www.lasegunda.com/rss20/index.asp?canal=26')
-              ,(u'Ciencia y Tecnologia', u'http://www.lasegunda.com/rss20/index.asp?canal=27')
-              ,(u'Solidaridad', u'http://www.lasegunda.com/rss20/index.asp?canal=28')
-              ,(u'Buena Vida', u'http://www.lasegunda.com/rss20/index.asp?canal=32')
+              ,(u'Politica'               , u'http://www.lasegunda.com/rss20/index.asp?canal=21')
+              ,(u'Cronica'                , u'http://www.lasegunda.com/rss20/index.asp?canal=20')
+              ,(u'Internacional'          , u'http://www.lasegunda.com/rss20/index.asp?canal=23')
+              ,(u'Deportes'               , u'http://www.lasegunda.com/rss20/index.asp?canal=24')
+              ,(u'Epectaculos/Cultura'    , u'http://www.lasegunda.com/rss20/index.asp?canal=25')
+              ,(u'Educacion'              , u'http://www.lasegunda.com/rss20/index.asp?canal=26')
+              ,(u'Ciencia y Tecnologia'   , u'http://www.lasegunda.com/rss20/index.asp?canal=27')
+              ,(u'Solidaridad'            , u'http://www.lasegunda.com/rss20/index.asp?canal=28')
+              ,(u'Buena Vida'             , u'http://www.lasegunda.com/rss20/index.asp?canal=32')
            ]

    def print_version(self, url):
        rest, sep, article_id = url.partition('index.asp?idnoticia=')        
        return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
    
-    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
-        soup.head.insert(0,mtag)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
-    
--- a/resources/recipes/la_tercera.recipe
+++ b/resources/recipes/la_tercera.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 latercera.com
 '''
@ -18,32 +16,32 @@ class LaTercera(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
-    remove_javascript     = True
    use_embedded_content  = False
+    remove_empty_feeds    = True
+    language              = 'es'
 
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+						, 'linearize_tables' : True
+                        }

    keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]

    remove_tags = [
-                     dict(name='script')
-                    ,dict(name='ul')
+                     dict(name=['ul','input','base'])
                    ,dict(name='div', attrs={'id':['boxComentarios','shim','enviarAmigo']})
                    ,dict(name='div', attrs={'class':['ad640','span-10 imgSet A','infoRelCol']})
-                    ,dict(name='input')
                    ,dict(name='p', attrs={'id':['mensajeError','mensajeEnviandoNoticia','mensajeExito']})
                  ]


    feeds = [
               (u'Noticias de ultima hora', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1')
-              ,(u'Pais', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=654')
+              ,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680')
+              ,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674')              
              ,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678')
              ,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656')
              ,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655')
@ -55,10 +53,6 @@ class LaTercera(BasicNewsRecipe):
            ]

    def preprocess_html(self, soup):
-        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
-        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
-
-    language = 'es'
--- a/resources/recipes/new_scientist.recipe
+++ b/resources/recipes/new_scientist.recipe
@ -1,11 +1,11 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
 '''
 newscientist.com
 '''

+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class NewScientist(BasicNewsRecipe):
@ -15,12 +15,14 @@ class NewScientist(BasicNewsRecipe):
    language              = 'en'
    publisher             = 'New Scientist'
    category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
-    delay                 = 3
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
+    cover_url             = 'http://www.newscientist.com/currentcover.jpg'
+    masthead_url          = 'http://www.newscientist.com/img/misc/ns_logo.jpg'
    encoding              = 'utf-8'
+    extra_css             = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} '

    conversion_options = {
                          'comment'          : description
@ -28,14 +30,18 @@ class NewScientist(BasicNewsRecipe):
                        , 'publisher'        : publisher
                        , 'language'         : language
                        }
+    preprocess_regexps = [(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')]

-    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol']})]
+    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','nsblgposts','hldgalcols']})]

    remove_tags = [
-                     dict(name='div', attrs={'class':['hldBd','adline','pnl','infotext' ]})
-                    ,dict(name='div', attrs={'id'   :['compnl','artIssueInfo','artTools']})
+                     dict(name='div'  , attrs={'class':['hldBd','adline','pnl','infotext' ]})
+                    ,dict(name='div'  , attrs={'id'   :['compnl','artIssueInfo','artTools']})
                    ,dict(name='p'    , attrs={'class':['marker','infotext'               ]})
+                    ,dict(name='meta' , attrs={'name' :'description'                       })
                  ]
+    remove_tags_after = dict(attrs={'class':'nbpcopy'})
+    remove_attributes = ['height','width']

    feeds          = [
                        (u'Latest Headlines'        , u'http://feeds.newscientist.com/science-news'              )
@ -50,9 +56,15 @@ class NewScientist(BasicNewsRecipe):
                     ]

    def get_article_url(self, article):
-        url = article.get('guid',  None)
-        return url
+        return article.get('guid',  None)

    def print_version(self, url):
        return url + '?full=true&print=true'

+    def preprocess_html(self, soup):
+        for tg in soup.findAll('a'):
+            if tg.string == 'Home':
+                tg.parent.extract()
+                return self.adeify_images(soup)
+        return self.adeify_images(soup)
+
--- a/resources/recipes/nspm.recipe
+++ b/resources/recipes/nspm.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 nspm.rs
 '''
@ -22,16 +20,15 @@ class Nspm(BasicNewsRecipe):
    INDEX                 = 'http://www.nspm.rs/?alphabet=l'
    encoding              = 'utf-8'
    language              = 'sr'
-
-    lang                  = 'sr-Latn-RS'
+    masthead_url          = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'

    conversion_options = {
                          'comment'          : description
                        , 'tags'             : category
                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                        , 'language'         : language
+                        , 'linearize_tables' : True
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
@ -39,6 +36,8 @@ class Nspm(BasicNewsRecipe):
                            dict(name=['link','object','embed'])
                           ,dict(name='td', attrs={'class':'buttonheading'})
                         ]
+    remove_tags_after = dict(attrs={'class':'article_separator'})
+    remove_attributes = ['width','height']

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -51,17 +50,6 @@ class Nspm(BasicNewsRecipe):
        return url.replace('.html','/stampa.html')

    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]
+        for item in soup.body.findAll(style=True):
+            del item['style']
        return self.adeify_images(soup)
--- a/resources/recipes/san_fran_chronicle.recipe
+++ b/resources/recipes/san_fran_chronicle.recipe
@ -7,10 +7,11 @@ sfgate.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+import re

 class SanFranciscoChronicle(BasicNewsRecipe):
    title                 = u'San Francisco Chronicle'
-    __author__            = u'Darko Miletic'
+    __author__            = u'Darko Miletic and Sujata Raman'
    description           = u'San Francisco news'
    language = 'en'

@ -19,13 +20,56 @@ class SanFranciscoChronicle(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False

-    remove_tags_before = {'class':'articleheadings'}
-    remove_tags_after =  dict(name='div', attrs={'id':'articlecontent' })
+
+
+    remove_tags_before  = {'id':'printheader'}
+
    remove_tags         = [
-                     dict(name='div', attrs={'class':'tools tools_top'})
-                    ,dict(name='div', attrs={'id':'articlebox'        })
+                            dict(name='div',attrs={'id':'printheader'})
+                           ,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
+                           ,dict(name='div',attrs={'id':'footer'})
                          ]

+    extra_css       = '''
+                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
+                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
+                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
+                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
+                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
+                        .byline{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        .date{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
+                        .dtlcomment{font-style:italic;}
+                        .georgia h3{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#000000;}
+                     '''
+
    feeds          = [
                         (u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
                     ]
+
+    def print_version(self,url):
+        url= url +"&type=printable"
+        return url
+
+    def get_article_url(self, article):
+        print str(article['title_detail']['value'])
+        url = article.get('guid',None)
+        url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
+        if "Presented By:" in str(article['title_detail']['value']):
+            url = ''
+        return url
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/resources/recipes/sfbg.recipe
+++ b/resources/recipes/sfbg.recipe
@ -0,0 +1,42 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class SanFranciscoBayGuardian(BasicNewsRecipe):
+    title          = u'San Francisco Bay Guardian'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    #encoding = 'latin1'
+
+    no_stylesheets = True
+    remove_tags_before = dict(name='div', attrs={'id':'story_header'})
+    remove_tags_after  = dict(name='div', attrs={'id':'shirttail'})
+    remove_tags = [
+       dict(name='iframe'),
+       #dict(name='div', attrs={'class':'related-articles'}),
+        dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}),
+       #dict(name='ul', attrs={'class':'article-tools'}),
+       dict(name='ul', attrs={'id':'story_tabs'}),
+    ]
+
+
+    feeds = [
+        ('Cover', 'http://www.newsobserver.com/100/index.rss'),
+        ('News', 'http://www.newsobserver.com/102/index.rss'),
+        ('Politics', 'http://www.newsobserver.com/105/index.rss'),
+        ('Business', 'http://www.newsobserver.com/104/index.rss'),
+        ('Sports', 'http://www.newsobserver.com/103/index.rss'),
+        ('College Sports', 'http://www.newsobserver.com/119/index.rss'),
+        ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
+        ('Editorials', 'http://www.newsobserver.com/158/index.rss')]
+
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'story_body'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        return soup
--- a/resources/recipes/smith.recipe
+++ b/resources/recipes/smith.recipe
@ -0,0 +1,52 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class SmithsonianMagazine(BasicNewsRecipe):
+    title          = u'Smithsonian Magazine'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 31#days
+    max_articles_per_feed = 50
+    #encoding = 'latin1'
+    recursions = 1
+    match_regexps = ['&page=[2-9]$']
+
+    remove_stylesheets = True
+    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    remove_tags_after  = dict(name='p', attrs={'id':'articlePaginationWrapper'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':'article_sidebar_border'}),
+       dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large']}),
+       #dict(name='ul', attrs={'class':'article-tools'}),
+       dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}),
+    ]
+
+
+    feeds          = [
+('History and Archeology',
+ 'http://feeds.feedburner.com/smithsonianmag/history-archaeology'),
+('People and Places',
+ 'http://feeds.feedburner.com/smithsonianmag/people-places'),
+('Science and Nature',
+ 'http://feeds.feedburner.com/smithsonianmag/science-nature'),
+('Arts and Culture',
+ 'http://feeds.feedburner.com/smithsonianmag/arts-culture'),
+('Travel',
+ 'http://feeds.feedburner.com/smithsonianmag/travel'),
+]
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'article-left'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        return soup
+
+    def postprocess_html(self, soup, first):
+        for p in soup.findAll(id='articlePaginationWrapper'): p.extract()
+        if not first:
+             for div in soup.findAll(id='article-head'): div.extract()
+        return soup
--- a/resources/recipes/strategy-business.recipe
+++ b/resources/recipes/strategy-business.recipe
@ -9,16 +9,35 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
    title = u'Strategy+Business'
    publisher = u' Booz & Company'
    category = u'Business'
-    description = u'Business magazine for senior business executives and the people who influence them.'
+    description = (u'Business magazine for senior business executives and the people who influence them.'
+            'Go to http://www.strategy-business.com/registration to sign up for a free account')

    oldest_article = 13 * 7 # 3 months
    max_articles_per_feed = 100
    use_embedded_content = False
    remove_empty_feeds = True
+    needs_subscription = True

    no_stylesheets = True
    remove_javascript = True

+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.open('http://www.strategy-business.com/registration')
+        for i, f in enumerate(br.forms()):
+            if 'gatekeeper_edit' in f.name:
+                br.select_form(name=f.name)
+                for c in f.controls:
+                    if c.name.endswith('_email'):
+                        br[c.name] = self.username
+                    elif c.name.endswith('_password'):
+                        br[c.name] = self.password
+                raw = br.submit().read()
+                if '>Logout' not in raw:
+                    raise ValueError('Failed to login, check your username and password')
+        return br
+
+
    extra_css = '''
                body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
                a {text-decoration: none; color: blue;}
--- a/resources/recipes/tanea.recipe
+++ b/resources/recipes/tanea.recipe
--- a/resources/recipes/the_escapist.recipe
+++ b/resources/recipes/the_escapist.recipe
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class al(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini'
-    description   = 'the Escapist Magazine'
+    description   = 'The Escapist Magazine'

    cover_url      = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
    title          = u'the Escapist Magazine'
--- a/resources/recipes/thecultofghoul.recipe
+++ b/resources/recipes/thecultofghoul.recipe
@ -0,0 +1,39 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+cultofghoul.blogspot.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TheCultOfGhoul(BasicNewsRecipe):
+    title                 = 'The Cult of Ghoul'
+    __author__            = 'Darko Miletic'
+    description           = 'Filmski blog'    
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    language              = 'sr'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = True
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'film, blog, srbija, strava, uzas'
+                        , 'publisher': 'Dejan Ognjanovic'
+                        , 'language' : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    feeds = [(u'Posts', u'http://cultofghoul.blogspot.com/feeds/posts/default')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
+
+
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@ -50,7 +50,11 @@ class WallStreetJournal(BasicNewsRecipe):
                br.select_form(nr=0)
                br['user']   = self.username
                br['password'] = self.password
-                br.submit()
+                res = br.submit()
+                raw = res.read()
+                if 'Welcome,' not in raw:
+                    raise ValueError('Failed to log in to wsj.com, check your '
+                            'username and password')
            return br

        def postprocess_html(self, soup, first):
@ -69,8 +73,10 @@ class WallStreetJournal(BasicNewsRecipe):
            soup = self.wsj_get_index()

            year = strftime('%Y')
-            for x in soup.findAll('td', attrs={'class':'b14'}):
+            for x in soup.findAll('td', height='25', attrs={'class':'b14'}):
                txt = self.tag_to_string(x).strip()
+                txt = txt.replace(u'\xa0', ' ')
+                txt = txt.encode('ascii', 'ignore')
                if year in txt:
                    self.timefmt = ' [%s]'%txt
                    break
--- a/setup/init.py
+++ b/setup/init.py
@ -11,7 +11,8 @@ import sys, re, os, platform
 is64bit = platform.architecture()[0] == '64bit'
 iswindows = re.search('win(32|64)', sys.platform)
 isosx = 'darwin' in sys.platform
-islinux = not isosx and not iswindows
+isfreebsd = 'freebsd' in sys.platform
+islinux = not isosx and not iswindows and not isfreebsd
 SRC = os.path.abspath('src')
 sys.path.insert(0, SRC)
 sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
@ -117,7 +118,7 @@ class Command(object):
        self.real_user = os.environ.get('SUDO_USER', None)

    def drop_privileges(self):
-        if not islinux or isosx:
+        if not islinux or isosx or isfreebsd:
            return
        if self.real_user is not None:
            self.info('Dropping privileges to those of', self.real_user+':',
@ -128,7 +129,7 @@ class Command(object):
            os.seteuid(int(self.real_uid))

    def regain_privileges(self):
-        if not islinux or isosx:
+        if not islinux or isosx or isfreebsd:
            return
        if os.geteuid() != 0 and self.orig_euid == 0:
            self.info('Trying to get root privileges')
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
 fc_lib = '/usr/lib'
 podofo_inc = '/usr/include/podofo'
 podofo_lib = '/usr/lib'
+chmlib_inc_dirs = chmlib_lib_dirs = []

 if iswindows:
    prefix  = r'C:\cygwin\home\kovid\sw'
@ -96,6 +97,10 @@ if iswindows:
    sw_lib_dir  = os.path.join(prefix, 'lib')
    fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
    fc_lib = sw_lib_dir
+    chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
+        'build', 'chmlib-0.40', 'src'))
+    chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
+        'build', 'chmlib-0.40', 'src', 'Release'))
    png_inc_dirs = [sw_inc_dir]
    png_lib_dirs = [sw_lib_dir]
    png_libs = ['png12']
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -11,15 +11,16 @@ from distutils import sysconfig

 from PyQt4.pyqtconfig import QtGuiModuleMakefile

-from setup import Command, islinux, isosx, SRC, iswindows
-from setup.build_environment import fc_inc, fc_lib, \
+from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
+from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
        fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
        podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
        QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
        magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
-        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
+        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
+        jpg_lib_dirs, chmlib_lib_dirs
 MT
-isunix = islinux or isosx
+isunix = islinux or isosx or isfreebsd

 make = 'make' if isunix else NMAKE

@ -56,6 +57,22 @@ if iswindows:
    pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']

 extensions = [
+
+    Extension('chmlib',
+            ['calibre/utils/chm/swig_chm.c'],
+            libraries=['ChmLib' if iswindows else 'chm'],
+            inc_dirs=chmlib_inc_dirs,
+            lib_dirs=chmlib_lib_dirs,
+            cflags=["-DSWIG_COBJECT_TYPES"]),
+
+    Extension('chm_extra',
+            ['calibre/utils/chm/extra.c'],
+            libraries=['ChmLib' if iswindows else 'chm'],
+            inc_dirs=chmlib_inc_dirs,
+            lib_dirs=chmlib_lib_dirs,
+            cflags=["-D__PYTHON__"]),
+
+
    Extension('pdfreflow',
                reflow_sources,
                headers=reflow_headers,
@ -126,7 +143,7 @@ extensions = [
 if iswindows:
    extensions.append(Extension('winutil',
                ['calibre/utils/windows/winutil.c'],
-                libraries=['shell32', 'setupapi'],
+                libraries=['shell32', 'setupapi', 'wininet'],
                cflags=['/X']
                ))

@ -154,6 +171,13 @@ if islinux:
    ldflags.append('-lpython'+sysconfig.get_python_version())


+if isfreebsd:
+    cflags.append('-pthread')
+    ldflags.append('-shared')
+    cflags.append('-I'+sysconfig.get_python_inc())
+    ldflags.append('-lpython'+sysconfig.get_python_version())
+
+
 if isosx:
    x, p = ('i386', 'ppc')
    archs = ['-arch', x, '-arch', p, '-isysroot',
--- a/setup/install.py
+++ b/setup/install.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'

 import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex

-from setup import Command, islinux, basenames, modules, functions, \
+from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
        __appname__, __version__

 HEADER = '''\
@ -116,7 +116,7 @@ class Develop(Command):


    def pre_sub_commands(self, opts):
-        if not islinux:
+        if not (islinux or isfreebsd):
            self.info('\nSetting up a source based development environment is only '
                    'supported on linux. On other platforms, see the User Manual'
                    ' for help with setting up a development environment.')
@ -156,7 +156,7 @@ class Develop(Command):
            self.warn('Failed to compile mount helper. Auto mounting of',
                ' devices will not work')

-        if os.geteuid() != 0:
+        if not isfreebsd and os.geteuid() != 0:
            return self.warn('Must be run as root to compile mount helper. Auto '
                    'mounting of devices will not work.')
        src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
@ -168,6 +168,7 @@ class Develop(Command):
        ret = p.wait()
        if ret != 0:
            return warn()
+        if not isfreebsd:
            os.chown(dest, 0, 0)
            os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
                    stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -42,6 +42,7 @@ class LinuxFreeze(Command):
                        '/usr/lib/liblcms.so.1',
                        '/tmp/calibre-mount-helper',
                        '/usr/lib/libunrar.so',
+                        '/usr/lib/libchm.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libmng.so.1',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -459,7 +459,7 @@ class Py2App(object):

    @flush
    def add_misc_libraries(self):
-        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
+        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
            info('\nAdding', x)
            x = 'lib%s.dylib'%x
            shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -12,7 +12,7 @@ warnings.simplefilter('ignore', DeprecationWarning)


 from calibre.startup import plugins, winutil, winutilerror
-from calibre.constants import iswindows, isosx, islinux, isfrozen, \
+from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
                              terminal_controller, preferred_encoding, \
                              __appname__, __version__, __author__, \
                              win32event, win32api, winerror, fcntl, \
@ -22,7 +22,7 @@ import mechanize
 if False:
    winutil, winutilerror, __appname__, islinux, __version__
    fcntl, win32event, isfrozen, __author__, terminal_controller
-    winerror, win32api
+    winerror, win32api, isfreebsd

 mimetypes.add_type('application/epub+zip',                '.epub')
 mimetypes.add_type('text/x-sony-bbeb+xml',                '.lrs')
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.42'
+__version__   = '0.6.44'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
@ -22,7 +22,8 @@ terminal_controller = TerminalController(sys.stdout)
 iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
 isosx     = 'darwin' in sys.platform.lower()
 isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
-islinux   = not(iswindows or isosx)
+isfreebsd = 'freebsd' in sys.platform.lower()
+islinux   = not(iswindows or isosx or isfreebsd)
 isfrozen  = hasattr(sys, 'frozen')
 isunix = isosx or islinux

@ -56,7 +57,8 @@ if plugins is None:
        sys.path.insert(0, plugin_path)

        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
-            'fontconfig', 'pdfreflow', 'progress_indicator'] + \
+            'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
+            'chm_extra'] + \
                    (['winutil'] if iswindows else []) + \
                    (['usbobserver'] if isosx else []):
            try:
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -119,11 +119,34 @@ class Plugin(object):

    def __enter__(self, *args):
        if self.plugin_path is not None:
+            from calibre.utils.zipfile import ZipFile
+            zf = ZipFile(self.plugin_path)
+            extensions = set([x.rpartition('.')[-1].lower() for x in
+                zf.namelist()])
+            zip_safe = True
+            for ext in ('pyd', 'so', 'dll', 'dylib'):
+                if ext in extensions:
+                    zip_safe = False
+            if zip_safe:
                sys.path.insert(0, self.plugin_path)
+                self.sys_insertion_path = self.plugin_path
+            else:
+                from calibre.ptempfile import TemporaryDirectory
+                self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
+                self.sys_insertion_path = self._sys_insertion_tdir.__enter__(*args)
+                zf.extractall(self.sys_insertion_path)
+                sys.path.insert(0, self.sys_insertion_path)
+            zf.close()
+

    def __exit__(self, *args):
-        if self.plugin_path in sys.path:
-            sys.path.remove(self.plugin_path)
+        ip, it = getattr(self, 'sys_insertion_path', None), getattr(self,
+                '_sys_insertion_tdir', None)
+        if ip in sys.path:
+            sys.path.remove(ip)
+        if hasattr(it, '__exit__'):
+            it.__exit__(*args)
+


 class FileTypePlugin(Plugin):
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -70,9 +70,10 @@ class PML2PMLZ(FileTypePlugin):
        pmlz = zipfile.ZipFile(of.name, 'w')
        pmlz.write(pmlfile, os.path.basename(pmlfile))

-        pml_img = os.path.basename(pmlfile)[0] + '_img'
-        img_dir = pml_img if os.path.exists(pml_img) else 'images' if \
-            os.path.exists('images') else ''
+        pml_img = os.path.splitext(pmlfile)[0] + '_img'
+        i_img = os.path.join(os.path.dirname(pmlfile),'images')
+        img_dir = pml_img if os.path.isdir(pml_img) else i_img if \
+            os.path.isdir(i_img) else ''
        if img_dir:
            for image in glob.glob(os.path.join(img_dir, '*.png')):
                pmlz.write(image, os.path.join('images', (os.path.basename(image))))
@ -81,17 +82,6 @@ class PML2PMLZ(FileTypePlugin):
        return of.name


-# CHM MODIFIED
-class CHMMetadataReader(MetadataReaderPlugin):
-
-    name        = 'Read CHM metadata'
-    file_types  = set(['chm'])
-    description = _('Read metadata from %s files') % 'CHM'
-
-    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.chm import get_metadata
-        return get_metadata(stream)
-
 class ComicMetadataReader(MetadataReaderPlugin):

    name = 'Read comic metadata'
@ -113,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
            mi.cover_data = (ext.lower(), data)
        return mi

+class CHMMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read CHM metadata'
+    file_types  = set(['chm'])
+    description = _('Read metadata from %s files') % 'CHM'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.chm.metadata import get_metadata
+        return get_metadata(stream)
+
+
 class EPUBMetadataReader(MetadataReaderPlugin):

    name        = 'Read EPUB metadata'
@ -394,7 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.tcr.input import TCRInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lrf.input import LRFInput
-from calibre.ebooks.chm.input import CHMInput # CHM MODIFIED
+from calibre.ebooks.chm.input import CHMInput

 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.fb2.output import FB2Output
@ -418,7 +419,7 @@ from calibre.devices.blackberry.driver import BLACKBERRY
 from calibre.devices.cybook.driver import CYBOOK
 from calibre.devices.eb600.driver import EB600, COOL_ER, SHINEBOOK, \
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK, \
-                BOOQ
+                BOOQ, ELONEX
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK
@ -433,6 +434,7 @@ from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
 from calibre.devices.binatone.driver import README
 from calibre.devices.hanvon.driver import N516, EB511
+from calibre.devices.teclast.driver import TECLAST_K3

 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
 from calibre.library.catalog import CSV_XML, EPUB_MOBI
@ -454,7 +456,7 @@ plugins += [
    TCRInput,
    TXTInput,
    LRFInput,
-    CHMInput, # CHM MODIFIED
+    CHMInput,
 ]
 plugins += [
    EPUBOutput,
@ -508,6 +510,8 @@ plugins += [
    README,
    N516,
    EB511,
+    ELONEX,
+    TECLAST_K3
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -214,8 +214,21 @@ class InputFormatPlugin(Plugin):
        return ret

    def postprocess_book(self, oeb, opts, log):
+        '''
+        Called to allow the input plugin to perform postprocessing after
+        the book has been parsed.
+        '''
        pass

+    def specialize(self, oeb, opts, log, output_fmt):
+        '''
+        Called to allow the input plugin to specialize the parsed book
+        for a particular output format. Called after postprocess_book
+        and before any transforms are performed on the parsed book.
+        '''
+        pass
+
+
 class OutputFormatPlugin(Plugin):
    '''
    OutputFormatPlugins are responsible for converting an OEB document
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -235,7 +235,7 @@ class SonyReaderOutput(OutputProfile):
    description = _('This profile is intended for the SONY PRS line. '
                    'The 500/505/600/700 etc.')

-    screen_size               = (600, 775)
+    screen_size               = (590, 775)
    dpi                       = 168.451
    fbase                     = 12
    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -235,6 +235,8 @@ def _run_filetype_plugins(path_to_file, ft=None, occasion='preprocess'):
        with plugin:
            try:
                nfp = plugin.run(path_to_file)
+                if not nfp:
+                    nfp = path_to_file
            except:
                print 'Running file type plugin %s failed with traceback:'%plugin.name
                traceback.print_exc()
@ -399,7 +401,7 @@ def initialize_plugins():
                plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
            except PluginNotFound:
                continue
-            plugin = initialize_plugin(plugin, zfp if not isinstance(zfp, type) else zfp)
+            plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
            _initialized_plugins.append(plugin)
        except:
            print 'Failed to initialize plugin...'
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -23,6 +23,8 @@ Run an embedded python interpreter.
                      help='Debug the specified device driver.')
    parser.add_option('-g', '--gui',  default=False, action='store_true',
                      help='Run the GUI',)
+    parser.add_option('-w', '--viewer',  default=False, action='store_true',
+                      help='Run the ebook viewer',)
    parser.add_option('--paths', default=False, action='store_true',
            help='Output the paths necessary to setup the calibre environment')
    parser.add_option('--migrate', action='store_true', default=False,
@ -98,6 +100,12 @@ def main(args=sys.argv):
    if opts.gui:
        from calibre.gui2.main import main
        main(['calibre'])
+    elif opts.viewer:
+        from calibre.gui2.viewer.main import main
+        vargs = ['ebook-viewer', '--debug-javascript']
+        if len(args) > 1:
+            vargs.append(args[-1])
+        main(vargs)
    elif opts.command:
        sys.argv = args[:1]
        exec opts.command
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -60,8 +60,10 @@ def debug(ioreg_to_tmp=False, buf=None):
        if isosx:
            from calibre.devices.usbms.device import Device
            mount = repr(Device.osx_run_mount())
-            ioreg = Device.run_ioreg()
-            ioreg = 'Output from mount:\n\n'+mount+'\n\n'+ioreg
+            drives = pprint.pformat(Device.osx_get_usb_drives())
+            ioreg = 'Output from mount:\n'+mount+'\n\n'
+            ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
+            ioreg += Device.run_ioreg()
        connected_devices = []
        for dev in device_plugins():
            out('Looking for', dev.__class__.__name__)
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -15,7 +15,7 @@ class ANDROID(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']

    # Ordered list of supported formats
-    FORMATS     = ['epub']
+    FORMATS     = ['epub', 'pdf']

    VENDOR_ID   = {
            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -195,3 +195,15 @@ class BOOQ(EB600):
    WINDOWS_MAIN_MEM = 'EB600'
    WINDOWS_CARD_A_MEM = 'EB600'

+class ELONEX(EB600):
+
+    name = 'Elonex 600EB'
+    gui_name = 'Elonex'
+
+    FORMATS = ['epub', 'pdf', 'txt', 'html']
+
+    VENDOR_NAME = 'ELONEX'
+    WINDOWS_MAIN_MEM = 'EBOOK'
+    WINDOWS_CARD_A_MEM = 'EBOOK'
+
+
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -6,6 +6,7 @@ the GUI. A device backend must subclass the L{Device} class. See prs500.py for
 a backend that implement the Device interface for the SONY PRS500 Reader.
 """
 import os
+from collections import namedtuple

 from calibre.customize import Plugin
 from calibre.constants import iswindows
@ -43,6 +44,9 @@ class DevicePlugin(Plugin):
    #: Icon for this device
    icon = I('reader.svg')

+    # Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
+    UserAnnotation = namedtuple('Annotation','type, bookmark')
+
    @classmethod
    def get_gui_name(cls):
        if hasattr(cls, 'gui_name'):
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -7,10 +7,9 @@ __docformat__ = 'restructuredtext en'
 '''
 Device driver for Amazon's Kindle
 '''
-
-import os
-import re
-import sys
+import os, re, sys
+from cStringIO import StringIO
+from struct import unpack

 from calibre.devices.usbms.driver import USBMS

@ -44,6 +43,7 @@ class KINDLE(USBMS):
    EBOOK_DIR_CARD_A = 'documents'
    DELETE_EXTS = ['.mbp']
    SUPPORTS_SUB_DIRS = True
+    SUPPORTS_ANNOTATIONS = True

    WIRELESS_FILE_NAME_PATTERN = re.compile(
    r'(?P<title>[^-]+)-asin_(?P<asin>[a-zA-Z\d]{10,})-type_(?P<type>\w{4})-v_(?P<index>\d+).*')
@ -60,6 +60,73 @@ class KINDLE(USBMS):
                                               'replace')
        return mi

+    def get_annotations(self, path_map):
+        MBP_FORMATS = [u'azw', u'mobi', u'prc', u'txt']
+        TAN_FORMATS = [u'tpz', u'azw1']
+
+        mbp_formats = set()
+        for fmt in MBP_FORMATS:
+            mbp_formats.add(fmt)
+        tan_formats = set()
+        for fmt in TAN_FORMATS:
+            tan_formats.add(fmt)
+
+        def get_storage():
+            storage = []
+            if self._main_prefix:
+                storage.append(os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN))
+            if self._card_a_prefix:
+                storage.append(os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A))
+            if self._card_b_prefix:
+                storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B))
+            return storage
+
+        def resolve_bookmark_paths(storage, path_map):
+            pop_list = []
+            book_ext = {}
+            for id in path_map:
+                file_fmts = set()
+                for fmt in path_map[id]['fmts']:
+                    file_fmts.add(fmt)
+
+                bookmark_extension = None
+                if file_fmts.intersection(mbp_formats):
+                    book_extension = list(file_fmts.intersection(mbp_formats))[0]
+                    bookmark_extension = 'mbp'
+                elif file_fmts.intersection(tan_formats):
+                    book_extension = list(file_fmts.intersection(tan_formats))[0]
+                    bookmark_extension = 'tan'
+
+                if bookmark_extension:
+                    for vol in storage:
+                        bkmk_path = path_map[id]['path'].replace(os.path.abspath('/<storage>'),vol)
+                        bkmk_path = bkmk_path.replace('bookmark',bookmark_extension)
+                        if os.path.exists(bkmk_path):
+                            path_map[id] = bkmk_path
+                            book_ext[id] = book_extension
+                            break
+                    else:
+                        pop_list.append(id)
+                else:
+                    pop_list.append(id)
+
+            # Remove non-existent bookmark templates
+            for id in pop_list:
+                path_map.pop(id)
+            return path_map, book_ext
+
+        storage = get_storage()
+        path_map, book_ext = resolve_bookmark_paths(storage, path_map)
+
+        bookmarked_books = {}
+        for id in path_map:
+            bookmark_ext = path_map[id].rpartition('.')[2]
+            myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
+            bookmarked_books[id] = self.UserAnnotation(type='kindle', bookmark=myBookmark)
+
+        # This returns as job.result in gui2.ui.annotations_fetched(self,job)
+        return bookmarked_books
+

 class KINDLE2(KINDLE):

@ -79,3 +146,213 @@ class KINDLE_DX(KINDLE2):

    PRODUCT_ID = [0x0003]
    BCD        = [0x0100]
+
+class Bookmark():
+    '''
+    A simple class fetching bookmark data
+    Kindle-specific
+    '''
+    def __init__(self, path, id, book_format, bookmark_extension):
+        self.book_format = book_format
+        self.bookmark_extension = bookmark_extension
+        self.book_length = 0
+        self.id = id
+        self.last_read = 0
+        self.last_read_location = 0
+        self.timestamp = 0
+        self.user_notes = None
+
+        self.get_bookmark_data(path)
+        self.get_book_length(path)
+        try:
+            self.percent_read = float(100*self.last_read / self.book_length)
+        except:
+            self.percent_read = 0
+
+    def record(self, n):
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        if n >= self.nrecs:
+            raise ValueError('non-existent record %r' % n)
+        offoff = 78 + (8 * n)
+        start, = unpack('>I', self.data[offoff + 0:offoff + 4])
+        stop = None
+        if n < (self.nrecs - 1):
+            stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
+        return StreamSlicer(self.stream, start, stop)
+
+    def get_bookmark_data(self, path):
+        ''' Return the timestamp and last_read_location '''
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        user_notes = {}
+        if self.bookmark_extension == 'mbp':
+            MAGIC_MOBI_CONSTANT = 150
+            with open(path,'rb') as f:
+                stream = StringIO(f.read())
+                data = StreamSlicer(stream)
+                self.timestamp, = unpack('>I', data[0x24:0x28])
+                bpar_offset, = unpack('>I', data[0x4e:0x52])
+                lrlo = bpar_offset + 0x0c
+                self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
+                self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
+                entries, = unpack('>I', data[0x4a:0x4e])
+
+                # Store the annotations/locations
+                bpl = bpar_offset + 4
+                bpar_len, = unpack('>I', data[bpl:bpl+4])
+                bpar_len += 8
+                #print "bpar_len: 0x%x" % bpar_len
+                eo = bpar_offset + bpar_len
+
+                # Walk bookmark entries
+                #print " --- %s --- " % path
+                current_entry = 1
+                sig = data[eo:eo+4]
+                previous_block = None
+
+                while sig == 'DATA':
+                    text = None
+                    entry_type = None
+                    rec_len, = unpack('>I', data[eo+4:eo+8])
+                    if rec_len == 0:
+                        current_block = "empty_data"
+                    elif  data[eo+8:eo+12] == "EBAR":
+                        current_block = "data_header"
+                        #entry_type = "data_header"
+                        location, = unpack('>I', data[eo+0x34:eo+0x38])
+                        #print "data_header location: %d" % location
+                    else:
+                        current_block = "text_block"
+                        if previous_block == 'empty_data':
+                            entry_type = 'Note'
+                        elif previous_block == 'data_header':
+                            entry_type = 'Highlight'
+                        text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
+
+                    if entry_type:
+                        displayed_location = location/MAGIC_MOBI_CONSTANT + 1
+                        user_notes[location] = dict(id=self.id,
+                                                    displayed_location=displayed_location,
+                                                    type=entry_type,
+                                                    text=text)
+
+                    eo += rec_len + 8
+                    current_entry += 1
+                    previous_block = current_block
+                    sig = data[eo:eo+4]
+
+                while sig == 'BKMK':
+                    # Fix start location for Highlights using BKMK data
+                    end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
+                    if end_loc in user_notes and user_notes[end_loc]['type'] == 'Highlight':
+                        start, = unpack('>I', data[eo+8:eo+12])
+                        user_notes[start] = user_notes[end_loc]
+                        user_notes.pop(end_loc)
+                    elif end_loc in user_notes and user_notes[end_loc]['type'] == 'Note':
+                        # Skip duplicate bookmarks for notes
+                        pass
+                    else:
+                        # If a bookmark coincides with a user annotation, the locs could
+                        # be the same - cheat by nudging -1
+                        # Skip bookmark for last_read_location
+                        if end_loc != self.last_read:
+                            displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
+                            user_notes[end_loc - 1] = dict(id=self.id,
+                                                           displayed_location=displayed_location,
+                                                           type='Bookmark',
+                                                           text=None)
+                    rec_len, = unpack('>I', data[eo+4:eo+8])
+                    eo += rec_len + 8
+                    sig = data[eo:eo+4]
+
+        elif self.bookmark_extension == 'tan':
+            # TAN bookmarks
+            MAGIC_TOPAZ_CONSTANT = 33.33
+            self.timestamp = os.path.getmtime(path)
+            with open(path,'rb') as f:
+                stream = StringIO(f.read())
+                data = StreamSlicer(stream)
+                self.last_read = int(unpack('>I', data[5:9])[0])
+                self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
+                entries, = unpack('>I', data[9:13])
+                current_entry = 0
+                e_base = 0x0d
+                while current_entry < entries:
+                    location, = unpack('>I', data[e_base+2:e_base+6])
+                    text = None
+                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
+                    e_type, = unpack('>B', data[e_base+1])
+                    if e_type == 0:
+                        e_type = 'Bookmark'
+                    elif e_type == 1:
+                        e_type = 'Highlight'
+                        text = "(Topaz highlights not yet supported)"
+                    elif e_type == 2:
+                        e_type = 'Note'
+                        text = data[e_base+0x10:e_base+0x10+text_len]
+                    else:
+                        e_type = 'Unknown annotation type'
+
+                    if self.book_format in ['tpz','azw1']:
+                        # *** This needs fine-tuning
+                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
+                    elif self.book_format == 'pdf':
+                        # *** This needs testing
+                        displayed_location = location
+                    user_notes[location] = dict(id=self.id,
+                                                displayed_location=displayed_location,
+                                                type=e_type,
+                                                text=text)
+                    if text_len == 0xFFFFFFFF:
+                        e_base = e_base + 14
+                    else:
+                        e_base = e_base + 14 + 2 + text_len
+                    current_entry += 1
+                for location in user_notes:
+                    if location == self.last_read:
+                        user_notes.pop(location)
+                        break
+        else:
+            print "unsupported bookmark_extension: %s" % self.bookmark_extension
+        self.user_notes = user_notes
+
+        '''
+        for location in sorted(user_notes):
+            print '  Location %d: %s\n%s' % (user_notes[location]['displayed_location'],
+                                                     user_notes[location]['type'],
+                                    '\n'.join(self.textdump(user_notes[location]['text'])))
+        '''
+
+    def get_book_length(self, path):
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        book_fs = path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
+
+        self.book_length = 0
+        if self.bookmark_extension == 'mbp':
+            # Read the book len from the header
+            with open(book_fs,'rb') as f:
+                self.stream = StringIO(f.read())
+                self.data = StreamSlicer(self.stream)
+                self.nrecs, = unpack('>H', self.data[76:78])
+                record0 = self.record(0)
+                self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
+        elif self.bookmark_extension == 'tan':
+            # Read bookLength from metadata
+            with open(book_fs,'rb') as f:
+                stream = StringIO(f.read())
+                raw = stream.read(8*1024)
+                if not raw.startswith('TPZ'):
+                    raise ValueError('Not a Topaz file')
+                first = raw.find('metadata')
+                if first < 0:
+                    raise ValueError('Invalid Topaz file')
+                second = raw.find('metadata', first+10)
+                if second < 0:
+                    raise ValueError('Invalid Topaz file')
+                raw = raw[second:second+1000]
+                idx = raw.find('bookLength')
+                if idx > -1:
+                    length = ord(raw[idx+len('bookLength')])
+                    self.book_length = int(raw[idx+len('bookLength')+1:idx+len('bookLength')+1+length])
+
+        else:
+            print "unsupported bookmark_extension: %s" % self.bookmark_extension
--- a/src/calibre/devices/libusb.py
+++ b/src/calibre/devices/libusb.py
@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
                   c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
 from errno import EBUSY, ENOMEM

-from calibre import iswindows, isosx, load_library
+from calibre import iswindows, isosx, isfreebsd, load_library

 _libusb_name = 'libusb'
-PATH_MAX = 511 if iswindows else 1024 if isosx else 4096
+PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
 if iswindows:
    class Structure(_Structure):
        _pack_ = 1
--- a/src/calibre/devices/teclast/init.py
+++ b/src/calibre/devices/teclast/init.py
@ -0,0 +1,10 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
--- a/src/calibre/devices/teclast/driver.py
+++ b/src/calibre/devices/teclast/driver.py
@ -0,0 +1,42 @@
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.devices.usbms.driver import USBMS
+
+class TECLAST_K3(USBMS):
+
+    name           = 'Teclast K3 Device Interface'
+    gui_name       = 'K3'
+    description    = _('Communicate with the Teclast K3 reader.')
+    author         = 'Kovid Goyal'
+    supported_platforms = ['windows', 'osx', 'linux']
+
+    # Ordered list of supported formats
+    FORMATS     = ['epub', 'fb2', 'doc', 'pdf', 'txt']
+
+    VENDOR_ID   = [0x071b]
+    PRODUCT_ID  = [0x3203]
+    BCD         = [0x0000]
+
+    VENDOR_NAME      = 'TECLAST'
+    WINDOWS_MAIN_MEM = 'DIGITAL_PLAYER'
+    WINDOWS_CARD_A_MEM = 'DIGITAL_PLAYER'
+
+    MAIN_MEMORY_VOLUME_LABEL  = 'K3 Main Memory'
+    STORAGE_CARD_VOLUME_LABEL = 'K3 Storage Card'
+
+    EBOOK_DIR_MAIN = ''
+    EBOOK_DIR_CARD_A = ''
+    SUPPORTS_SUB_DIRS = True
+
+    def windows_sort_drives(self, drives):
+        main = drives.get('main', None)
+        card = drives.get('carda', None)
+        if card and main and card < main:
+            drives['main'] = card
+            drives['carda'] = main
+
+        return drives
+
+
--- a/src/calibre/devices/usbms/cli.py
+++ b/src/calibre/devices/usbms/cli.py
@ -4,8 +4,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import os
-import shutil
+import os, shutil, time

 from calibre.devices.errors import PathError

@ -50,11 +49,12 @@ class CLI(object):
        d = os.path.dirname(path)
        if not os.path.exists(d):
            os.makedirs(d)
-        with open(path, 'wb') as dest:
+        with open(path, 'w+b') as dest:
            try:
                shutil.copyfileobj(infile, dest)
            except IOError:
                print 'WARNING: First attempt to send file to device failed'
+                time.sleep(0.2)
                infile.seek(0)
                dest.seek(0)
                dest.truncate()
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -17,6 +17,7 @@ import time
 import re
 import sys
 import glob
+
 from itertools import repeat

 from calibre.devices.interface import DevicePlugin
@ -333,10 +334,14 @@ class Device(DeviceConfig, DevicePlugin):
                    raise
            time.sleep(2)

-    def _osx_bsd_names(self):
+    @classmethod
+    def osx_get_usb_drives(cls):
        if usbobserver_err:
            raise RuntimeError('Failed to load usbobserver: '+usbobserver_err)
-        drives = usbobserver.get_usb_drives()
+        return usbobserver.get_usb_drives()
+
+    def _osx_bsd_names(self):
+        drives = self.osx_get_usb_drives()
        matches = []
        d = self.detected_device
        if d.serial:
@ -394,16 +399,6 @@ class Device(DeviceConfig, DevicePlugin):
        if len(matches) > 2:
            drives['cardb'] = matches[2]

-        pat = self.OSX_MAIN_MEM_VOL_PAT
-        if pat is not None and len(drives) > 1 and 'main' in drives:
-            if pat.search(drives['main']) is None:
-                main = drives['main']
-                for x in ('carda', 'cardb'):
-                    if x in drives and pat.search(drives[x]):
-                        drives['main'] = drives.pop(x)
-                        drives[x] = main
-                        break
-
        return drives

    def osx_bsd_names(self):
@ -427,6 +422,16 @@ class Device(DeviceConfig, DevicePlugin):
        if drives['main'] is None:
            print bsd_drives, mount_map, drives
            raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
+        pat = self.OSX_MAIN_MEM_VOL_PAT
+        if pat is not None and len(drives) > 1 and 'main' in drives:
+            if pat.search(drives['main']) is None:
+                main = drives['main']
+                for x in ('carda', 'cardb'):
+                    if x in drives and pat.search(drives[x]):
+                        drives['main'] = drives.pop(x)
+                        drives[x] = main
+                        break
+
        self._main_prefix = drives['main']+os.sep
        def get_card_prefix(c):
            ans = drives.get(c, None)
@ -789,7 +794,13 @@ class Device(DeviceConfig, DevicePlugin):
        '''
        return components

-    def create_upload_path(self, path, mdata, fname):
+    def get_annotations(self, path_map):
+        '''
+        Resolve path_map to annotation_map of files found on the device
+        '''
+        return {}
+
+    def create_upload_path(self, path, mdata, fname, create_dirs=True):
        path = os.path.abspath(path)
        extra_components = []

@ -848,7 +859,7 @@ class Device(DeviceConfig, DevicePlugin):
        filedir = os.path.dirname(filepath)


-        if not os.path.exists(filedir):
+        if create_dirs and not os.path.exists(filedir):
            os.makedirs(filedir)

        return filepath
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -123,7 +123,7 @@ class USBMS(CLI, Device):
        '''
        :path: the full path were the associated book is located.
        :filename: the name of the book file without the extension.
-        :metatdata: metadata belonging to the book. Use metadata.thumbnail
+        :metadata: metadata belonging to the book. Use metadata.thumbnail
        for cover
        '''
        pass
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -129,3 +129,12 @@ def render_html(path_to_html, width=590, height=750):
    del loop
    return renderer

+def check_ebook_format(stream, current_guess):
+    ans = current_guess
+    if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1'):
+        stream.seek(0)
+        if stream.read(3) == 'TPZ':
+            ans = 'tpz'
+        stream.seek(0)
+    return ans
+
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -53,13 +53,15 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
                        "x-sjis" : "shift-jis" }


-def force_encoding(raw, verbose):
+def force_encoding(raw, verbose, assume_utf8=False):
    from calibre.constants import preferred_encoding
    try:
        chardet = detect(raw)
    except:
        chardet = {'encoding':preferred_encoding, 'confidence':0}
    encoding = chardet['encoding']
+    if chardet['confidence'] < 1 and assume_utf8:
+        encoding = 'utf-8'
    if chardet['confidence'] < 1 and verbose:
        print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
    if not encoding:
@ -73,7 +75,7 @@ def force_encoding(raw, verbose):


 def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
-                   resolve_entities=False):
+                   resolve_entities=False, assume_utf8=False):
    '''
    Force conversion of byte string to unicode. Tries to look for XML/HTML
    encoding declaration first, if not found uses the chardet library and
@ -95,7 +97,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
                encoding = match.group(1)
                break
        if encoding is None:
-            encoding = force_encoding(raw, verbose)
+            encoding = force_encoding(raw, verbose, assume_utf8=assume_utf8)
        try:
            if encoding.lower().strip() == 'macintosh':
                encoding = 'mac-roman'
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -1,213 +1,17 @@
-from __future__ import with_statement
 ''' CHM File decoding support '''
 __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                 ' and Alex Bramley <a.bramley at gmail.com>.'

-import os, shutil, uuid, re
-from tempfile import mkdtemp
-from mimetypes import guess_type as guess_mimetype
+import os, uuid

-from BeautifulSoup import BeautifulSoup, NavigableString
 from lxml import html
-from pychm.chm import CHMFile
-from pychm.chmlib import (
-  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
-  chm_enumerate,
-)

-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre.utils.config import OptionParser
-from calibre.ebooks.metadata.toc import TOC
+from calibre.customize.conversion import InputFormatPlugin
+from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename

-
-def match_string(s1, s2_already_lowered):
-    if s1 is not None and s2_already_lowered is not None:
-        if s1.lower()==s2_already_lowered:
-            return True
-    return False
-
-def check_all_prev_empty(tag):
-    if tag is None:
-        return True
-    if tag.__class__ == NavigableString and not check_empty(tag):
-        return False
-    return check_all_prev_empty(tag.previousSibling)
-
-def check_empty(s, rex = re.compile(r'\S')):
-    return rex.search(s) is None
-
-
-def option_parser():
-    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
-    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
-    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
-    parser.add_option("-t", "--title", action="store", type="string", \
-                    dest="title", help=_("Set the book title"))
-    parser.add_option('--title-sort', action='store', type='string', default=None,
-                      dest='title_sort', help=_('Set sort key for the title'))
-    parser.add_option("-a", "--author", action="store", type="string", \
-                    dest="author", help=_("Set the author"))
-    parser.add_option('--author-sort', action='store', type='string', default=None,
-                      dest='author_sort', help=_('Set sort key for the author'))
-    parser.add_option("-c", "--category", action="store", type="string", \
-                    dest="category", help=_("The category this book belongs"
-                    " to. E.g.: History"))
-    parser.add_option("--thumbnail", action="store", type="string", \
-                    dest="thumbnail", help=_("Path to a graphic that will be"
-                    " set as this files' thumbnail"))
-    parser.add_option("--comment", action="store", type="string", \
-                    dest="freetext", help=_("Path to a txt file containing a comment."))
-    parser.add_option("--get-thumbnail", action="store_true", \
-                    dest="get_thumbnail", default=False, \
-                    help=_("Extract thumbnail from LRF file"))
-    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
-    parser.add_option('--classification', default=None, help=_('Set the book classification'))
-    parser.add_option('--creator', default=None, help=_('Set the book creator'))
-    parser.add_option('--producer', default=None, help=_('Set the book producer'))
-    parser.add_option('--get-cover', action='store_true', default=False,
-                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
-    parser.add_option('--bookid', action='store', type='string', default=None,
-                      dest='book_id', help=_('Set book ID'))
-    parser.add_option('--font-delta', action='store', type='int', default=0,
-                      dest='font_delta', help=_('Set font delta'))
-    return parser
-
-class CHMError(Exception):
-    pass
-
-class CHMReader(CHMFile):
-    def __init__(self, input, log):
-        CHMFile.__init__(self)
-        if not self.LoadCHM(input):
-            raise CHMError("Unable to open CHM file '%s'"%(input,))
-        self.log = log
-        self._sourcechm = input
-        self._contents = None
-        self._playorder = 0
-        self._metadata = False
-        self._extracted = False
-
-        # location of '.hhc' file, which is the CHM TOC.
-        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
-        self.hhc_path = self.root + ".hhc"
-
-
-    def _parse_toc(self, ul, basedir=os.getcwdu()):
-        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
-        self._playorder += 1
-        for li in ul('li', recursive=False):
-            href = li.object('param', {'name': 'Local'})[0]['value']
-            if href.count('#'):
-                href, frag = href.split('#')
-            else:
-                frag = None
-            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
-            #print "========>", name
-            toc.add_item(href, frag, name, play_order=self._playorder)
-            self._playorder += 1
-            if li.ul:
-               child = self._parse_toc(li.ul)
-               child.parent = toc
-               toc.append(child)
-        #print toc
-        return toc
-
-
-    def GetFile(self, path):
-        # have to have abs paths for ResolveObject, but Contents() deliberately
-        # makes them relative. So we don't have to worry, re-add the leading /.
-        # note this path refers to the internal CHM structure
-        if path[0] != '/':
-            path = '/' + path
-        res, ui = self.ResolveObject(path)
-        if res != CHM_RESOLVE_SUCCESS:
-            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
-        size, data = self.RetrieveObject(ui)
-        if size == 0:
-            raise CHMError("'%s' is zero bytes in length!"%(path,))
-        return data
-
-    def ExtractFiles(self, output_dir=os.getcwdu()):
-        for path in self.Contents():
-            lpath = os.path.join(output_dir, path)
-            self._ensure_dir(lpath)
-            data = self.GetFile(path)
-            with open(lpath, 'wb') as f:
-                if guess_mimetype(path)[0] == ('text/html'):
-                    data = self._reformat(data)
-                f.write(data)
-        #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
-        self._extracted = True
-
-    def _reformat(self, data):
-        try:
-            soup = BeautifulSoup(data)
-        except UnicodeEncodeError:
-            # hit some strange encoding problems...
-            print "Unable to parse html for cleaning, leaving it :("
-            return data
-        # nuke javascript...
-        [s.extract() for s in soup('script')]
-        # remove forward and back nav bars from the top/bottom of each page
-        # cos they really fuck with the flow of things and generally waste space
-        # since we can't use [a,b] syntax to select arbitrary items from a list
-        # we'll have to do this manually...
-        t = soup('table')
-        if t:
-            if (t[0].previousSibling is None
-              or t[0].previousSibling.previousSibling is None):
-                t[0].extract()
-            if (t[-1].nextSibling is None
-              or t[-1].nextSibling.nextSibling is None):
-                t[-1].extract()
-        # for some very odd reason each page's content appears to be in a table
-        # too. and this table has sub-tables for random asides... grr.
-
-        # remove br at top of page if present after nav bars removed
-        br = soup('br')
-        if br:
-            if check_all_prev_empty(br[0].previousSibling):
-                br[0].extract()
-
-        # some images seem to be broken in some chm's :/
-        for img in soup('img'):
-            try:
-                # some are supposedly "relative"... lies.
-                while img['src'].startswith('../'): img['src'] = img['src'][3:]
-                # some have ";<junk>" at the end.
-                img['src'] = img['src'].split(';')[0]
-            except KeyError:
-                # and some don't even have a src= ?!
-                pass
-        # now give back some pretty html.
-        return soup.prettify()
-
-    def Contents(self):
-        if self._contents is not None:
-            return self._contents
-        paths = []
-        def get_paths(chm, ui, ctx):
-            # skip directories
-            # note this path refers to the internal CHM structure
-            if ui.path[-1] != '/':
-                # and make paths relative
-                paths.append(ui.path.lstrip('/'))
-        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
-        self._contents = paths
-        return self._contents
-
-    def _ensure_dir(self, path):
-        dir = os.path.dirname(path)
-        if not os.path.isdir(dir):
-            os.makedirs(dir)
-
-    def extract_content(self, output_dir=os.getcwdu()):
-        self.ExtractFiles(output_dir=output_dir)
-
-
 class CHMInput(InputFormatPlugin):

    name        = 'CHM Input'
@ -215,25 +19,22 @@ class CHMInput(InputFormatPlugin):
    description = 'Convert CHM files to OEB'
    file_types  = set(['chm'])

-    options = set([
-        OptionRecommendation(name='dummy_option', recommended_value=False,
-            help=_('dummy option until real options are determined.')),
-    ])
-
    def _chmtohtml(self, output_dir, chm_path, no_images, log):
+        from calibre.ebooks.chm.reader import CHMReader
        log.debug('Opening CHM file')
        rdr = CHMReader(chm_path, log)
        log.debug('Extracting CHM to %s' % output_dir)
        rdr.extract_content(output_dir)
+        self._chm_reader = rdr
        return rdr.hhc_path


    def convert(self, stream, options, file_ext, log, accelerators):
-        from calibre.ebooks.metadata.chm import get_metadata_
+        from calibre.ebooks.chm.metadata import get_metadata_from_reader
+        from calibre.customize.ui import plugin_for_input_format

        log.debug('Processing CHM...')
-        tdir = mkdtemp(prefix='chm2oeb_')
-        from calibre.customize.ui import plugin_for_input_format
+        with TemporaryDirectory('_chm2oeb') as tdir:
            html_input = plugin_for_input_format('html')
            for opt in html_input.options:
                setattr(options, opt.option.name, opt.recommended_value)
@ -248,8 +49,9 @@ class CHMInput(InputFormatPlugin):
            log.debug('stream.name=%s' % stream.name)
            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
            mainpath = os.path.join(tdir, mainname)
+            #raw_input()

-        metadata = get_metadata_(tdir)
+            metadata = get_metadata_from_reader(self._chm_reader)

            odi = options.debug_pipeline
            options.debug_pipeline = None
@ -260,7 +62,6 @@ class CHMInput(InputFormatPlugin):
            oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
            options.debug_pipeline = odi
            #log.debug('DEBUG: Not removing tempdir %s' % tdir)
-        shutil.rmtree(tdir)
        return oeb

    def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
@ -369,6 +170,9 @@ class CHMInput(InputFormatPlugin):
        # check that node is a normal node (not a comment, DOCTYPE, etc.)
        # (normal nodes have string tags)
        if isinstance(node.tag, basestring):
+            from calibre.ebooks.chm.reader import match_string
+
+            chapter_path = None
            if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
                for child in node:
                    if match_string(child.tag,'param') and match_string(child.attrib['name'], 'name'):
--- a/src/calibre/ebooks/chm/metadata.py
+++ b/src/calibre/ebooks/chm/metadata.py
@ -0,0 +1,157 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata import string_to_authors, MetaInformation
+from calibre.utils.logging import default_log
+from calibre.ptempfile import TemporaryFile
+
+def _clean(s):
+    return s.replace(u'\u00a0', u' ')
+
+def _detag(tag):
+    str = u""
+    for elem in tag:
+        if hasattr(elem, "contents"):
+            str += _detag(elem)
+        else:
+            str += _clean(elem)
+    return str
+
+
+def _metadata_from_table(soup, searchfor):
+    td = soup.find('td', text=re.compile(searchfor, flags=re.I))
+    if td is None:
+        return None
+    td = td.parent
+    # there appears to be multiple ways of structuring the metadata
+    # on the home page. cue some nasty special-case hacks...
+    if re.match(r'^\s*'+searchfor+r'\s*$', td.renderContents(), flags=re.I):
+        meta = _detag(td.findNextSibling('td'))
+        return re.sub('^:', '', meta).strip()
+    else:
+        meta = _detag(td)
+        return re.sub(r'^[^:]+:', '', meta).strip()
+
+def _metadata_from_span(soup, searchfor):
+    span = soup.find('span', {'class': re.compile(searchfor, flags=re.I)})
+    if span is None:
+        return None
+    # this metadata might need some cleaning up still :/
+    return _detag(span.renderContents().strip())
+
+def _get_authors(soup):
+    aut = (_metadata_from_span(soup, r'author')
+        or _metadata_from_table(soup, r'^\s*by\s*:?\s+'))
+    ans = [_('Unknown')]
+    if aut is not None:
+        ans = string_to_authors(aut)
+    return ans
+
+def _get_publisher(soup):
+    return (_metadata_from_span(soup, 'imprint')
+        or _metadata_from_table(soup, 'publisher'))
+
+def _get_isbn(soup):
+    return (_metadata_from_span(soup, 'isbn')
+        or _metadata_from_table(soup, 'isbn'))
+
+def _get_comments(soup):
+    date = (_metadata_from_span(soup, 'cwdate')
+        or _metadata_from_table(soup, 'pub date'))
+    pages = ( _metadata_from_span(soup, 'pages')
+        or _metadata_from_table(soup, 'pages'))
+    try:
+        # date span can have copyright symbols in it...
+        date = date.replace(u'\u00a9', '').strip()
+        # and pages often comes as '(\d+ pages)'
+        pages = re.search(r'\d+', pages).group(0)
+        return u'Published %s, %s pages.' % (date, pages)
+    except:
+        pass
+    return None
+
+def _get_cover(soup, rdr):
+    ans = None
+    try:
+        ans = soup.find('img', alt=re.compile('cover', flags=re.I))['src']
+    except TypeError:
+        # meeehh, no handy alt-tag goodness, try some hackery
+        # the basic idea behind this is that in general, the cover image
+        # has a height:width ratio of ~1.25, whereas most of the nav
+        # buttons are decidedly less than that.
+        # what we do in this is work out that ratio, take 1.25 off it and
+        # save the absolute value when we sort by this value, the smallest
+        # one is most likely to be the cover image, hopefully.
+        r = {}
+        for img in soup('img'):
+            try:
+                r[abs(float(img['height'])/float(img['width'])-1.25)] = img['src']
+            except KeyError:
+                # interestingly, occasionally the only image without height
+                # or width attrs is the cover...
+                r[0] = img['src']
+        l = r.keys()
+        l.sort()
+        ans = r[l[0]]
+    # this link comes from the internal html, which is in a subdir
+    if ans is not None:
+        try:
+            ans = rdr.GetFile(ans)
+        except:
+            ans = rdr.root + "/" + ans
+            try:
+                ans = rdr.GetFile(ans)
+            except:
+                ans = None
+        if ans is not None:
+            from PIL import Image
+            from cStringIO import StringIO
+            buf = StringIO()
+            try:
+                Image.open(StringIO(ans)).convert('RGB').save(buf, 'JPEG')
+                ans = buf.getvalue()
+            except:
+                ans = None
+    return ans
+
+
+def get_metadata_from_reader(rdr):
+    raw = rdr.GetFile(rdr.home)
+    home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True,
+        resolve_entities=True)[0])
+
+    title = rdr.title
+    authors = _get_authors(home)
+    mi = MetaInformation(title, authors)
+    publisher = _get_publisher(home)
+    if publisher:
+        mi.publisher = publisher
+    isbn = _get_isbn(home)
+    if isbn:
+        mi.isbn = isbn
+    comments = _get_comments(home)
+    if comments:
+        mi.comments = comments
+
+    cdata = _get_cover(home, rdr)
+    if cdata is not None:
+        mi.cover_data = ('jpg', cdata)
+
+    return mi
+
+def get_metadata(stream):
+    with TemporaryFile('_chm_metadata.chm') as fname:
+        with open(fname, 'wb') as f:
+            f.write(stream.read())
+        from calibre.ebooks.chm.reader import CHMReader
+        rdr = CHMReader(fname, default_log)
+        return get_metadata_from_reader(rdr)
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -0,0 +1,212 @@
+from __future__ import with_statement
+''' CHM File decoding support '''
+__license__ = 'GPL v3'
+__copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
+                 ' and Alex Bramley <a.bramley at gmail.com>.'
+
+import os, re
+from mimetypes import guess_type as guess_mimetype
+
+from BeautifulSoup import BeautifulSoup, NavigableString
+
+from calibre.utils.chm.chm import CHMFile
+from calibre.utils.chm.chmlib import (
+  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
+  chm_enumerate,
+)
+
+from calibre.utils.config import OptionParser
+from calibre.ebooks.metadata.toc import TOC
+
+
+def match_string(s1, s2_already_lowered):
+    if s1 is not None and s2_already_lowered is not None:
+        if s1.lower()==s2_already_lowered:
+            return True
+    return False
+
+def check_all_prev_empty(tag):
+    if tag is None:
+        return True
+    if tag.__class__ == NavigableString and not check_empty(tag):
+        return False
+    return check_all_prev_empty(tag.previousSibling)
+
+def check_empty(s, rex = re.compile(r'\S')):
+    return rex.search(s) is None
+
+
+def option_parser():
+    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
+    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
+    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
+    parser.add_option("-t", "--title", action="store", type="string", \
+                    dest="title", help=_("Set the book title"))
+    parser.add_option('--title-sort', action='store', type='string', default=None,
+                      dest='title_sort', help=_('Set sort key for the title'))
+    parser.add_option("-a", "--author", action="store", type="string", \
+                    dest="author", help=_("Set the author"))
+    parser.add_option('--author-sort', action='store', type='string', default=None,
+                      dest='author_sort', help=_('Set sort key for the author'))
+    parser.add_option("-c", "--category", action="store", type="string", \
+                    dest="category", help=_("The category this book belongs"
+                    " to. E.g.: History"))
+    parser.add_option("--thumbnail", action="store", type="string", \
+                    dest="thumbnail", help=_("Path to a graphic that will be"
+                    " set as this files' thumbnail"))
+    parser.add_option("--comment", action="store", type="string", \
+                    dest="freetext", help=_("Path to a txt file containing a comment."))
+    parser.add_option("--get-thumbnail", action="store_true", \
+                    dest="get_thumbnail", default=False, \
+                    help=_("Extract thumbnail from LRF file"))
+    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
+    parser.add_option('--classification', default=None, help=_('Set the book classification'))
+    parser.add_option('--creator', default=None, help=_('Set the book creator'))
+    parser.add_option('--producer', default=None, help=_('Set the book producer'))
+    parser.add_option('--get-cover', action='store_true', default=False,
+                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
+    parser.add_option('--bookid', action='store', type='string', default=None,
+                      dest='book_id', help=_('Set book ID'))
+    parser.add_option('--font-delta', action='store', type='int', default=0,
+                      dest='font_delta', help=_('Set font delta'))
+    return parser
+
+class CHMError(Exception):
+    pass
+
+class CHMReader(CHMFile):
+    def __init__(self, input, log):
+        CHMFile.__init__(self)
+        if not self.LoadCHM(input):
+            raise CHMError("Unable to open CHM file '%s'"%(input,))
+        self.log = log
+        self._sourcechm = input
+        self._contents = None
+        self._playorder = 0
+        self._metadata = False
+        self._extracted = False
+
+        # location of '.hhc' file, which is the CHM TOC.
+        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
+        self.hhc_path = self.root + ".hhc"
+
+
+    def _parse_toc(self, ul, basedir=os.getcwdu()):
+        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
+        self._playorder += 1
+        for li in ul('li', recursive=False):
+            href = li.object('param', {'name': 'Local'})[0]['value']
+            if href.count('#'):
+                href, frag = href.split('#')
+            else:
+                frag = None
+            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
+            #print "========>", name
+            toc.add_item(href, frag, name, play_order=self._playorder)
+            self._playorder += 1
+            if li.ul:
+               child = self._parse_toc(li.ul)
+               child.parent = toc
+               toc.append(child)
+        #print toc
+        return toc
+
+
+    def GetFile(self, path):
+        # have to have abs paths for ResolveObject, but Contents() deliberately
+        # makes them relative. So we don't have to worry, re-add the leading /.
+        # note this path refers to the internal CHM structure
+        if path[0] != '/':
+            path = '/' + path
+        res, ui = self.ResolveObject(path)
+        if res != CHM_RESOLVE_SUCCESS:
+            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
+        size, data = self.RetrieveObject(ui)
+        if size == 0:
+            raise CHMError("'%s' is zero bytes in length!"%(path,))
+        return data
+
+    def ExtractFiles(self, output_dir=os.getcwdu()):
+        for path in self.Contents():
+            lpath = os.path.join(output_dir, path)
+            self._ensure_dir(lpath)
+            data = self.GetFile(path)
+            with open(lpath, 'wb') as f:
+                if guess_mimetype(path)[0] == ('text/html'):
+                    data = self._reformat(data)
+                f.write(data)
+        self._extracted = True
+        files = os.listdir(output_dir)
+        if self.hhc_path not in files:
+            for f in files:
+                if f.lower() == self.hhc_path.lower():
+                    self.hhc_path = f
+                    break
+
+    def _reformat(self, data):
+        try:
+            soup = BeautifulSoup(data)
+        except UnicodeEncodeError:
+            # hit some strange encoding problems...
+            print "Unable to parse html for cleaning, leaving it :("
+            return data
+        # nuke javascript...
+        [s.extract() for s in soup('script')]
+        # remove forward and back nav bars from the top/bottom of each page
+        # cos they really fuck with the flow of things and generally waste space
+        # since we can't use [a,b] syntax to select arbitrary items from a list
+        # we'll have to do this manually...
+        t = soup('table')
+        if t:
+            if (t[0].previousSibling is None
+              or t[0].previousSibling.previousSibling is None):
+                t[0].extract()
+            if (t[-1].nextSibling is None
+              or t[-1].nextSibling.nextSibling is None):
+                t[-1].extract()
+        # for some very odd reason each page's content appears to be in a table
+        # too. and this table has sub-tables for random asides... grr.
+
+        # remove br at top of page if present after nav bars removed
+        br = soup('br')
+        if br:
+            if check_all_prev_empty(br[0].previousSibling):
+                br[0].extract()
+
+        # some images seem to be broken in some chm's :/
+        for img in soup('img'):
+            try:
+                # some are supposedly "relative"... lies.
+                while img['src'].startswith('../'): img['src'] = img['src'][3:]
+                # some have ";<junk>" at the end.
+                img['src'] = img['src'].split(';')[0]
+            except KeyError:
+                # and some don't even have a src= ?!
+                pass
+        # now give back some pretty html.
+        return soup.prettify()
+
+    def Contents(self):
+        if self._contents is not None:
+            return self._contents
+        paths = []
+        def get_paths(chm, ui, ctx):
+            # skip directories
+            # note this path refers to the internal CHM structure
+            if ui.path[-1] != '/':
+                # and make paths relative
+                paths.append(ui.path.lstrip('/'))
+        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
+        self._contents = paths
+        return self._contents
+
+    def _ensure_dir(self, path):
+        dir = os.path.dirname(path)
+        if not os.path.isdir(dir):
+            os.makedirs(dir)
+
+    def extract_content(self, output_dir=os.getcwdu()):
+        self.ExtractFiles(output_dir=output_dir)
+
+
+
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -13,6 +13,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
 from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.date import parse_date
+from calibre.utils.zipfile import ZipFile
 from calibre import extract, walk

 DEBUG_README=u'''
@ -726,6 +727,13 @@ OptionRecommendation(name='timestamp',
        else:
            os.makedirs(out_dir)
            self.dump_oeb(ret, out_dir)
+        if self.input_fmt == 'recipe':
+            zf = ZipFile(os.path.join(self.opts.debug_pipeline,
+                'periodical.downloaded_recipe'), 'w')
+            zf.add_dir(out_dir)
+            with self.input_plugin:
+                self.input_plugin.save_download(zf)
+            zf.close()

        self.log.info('Input debug saved to:', out_dir)

@ -773,6 +781,7 @@ OptionRecommendation(name='timestamp',
        self.ui_reporter(0.01, _('Converting input to HTML...'))
        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
        self.input_plugin.report_progress = ir
+        with self.input_plugin:
            self.oeb = self.input_plugin(stream, self.opts,
                                        self.input_fmt, self.log,
                                        accelerators, tdir)
@ -780,7 +789,7 @@ OptionRecommendation(name='timestamp',
                self.dump_input(self.oeb, tdir)
                if self.abort_after_input_dump:
                    return
-        if self.input_fmt == 'recipe':
+            if self.input_fmt in ('recipe', 'downloaded_recipe'):
                self.opts_to_mi(self.user_metadata)
            if not hasattr(self.oeb, 'manifest'):
                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
@ -793,6 +802,8 @@ OptionRecommendation(name='timestamp',
                out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
                self.dump_oeb(self.oeb, out_dir)
                self.log('Parsed HTML written to:', out_dir)
+            self.input_plugin.specialize(self.oeb, self.opts, self.log,
+                    self.output_fmt)

        pr(0., _('Running transforms on ebook...'))

@ -882,6 +893,7 @@ OptionRecommendation(name='timestamp',
        our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
        self.output_plugin.report_progress = our
        our(0., _('Creating')+' %s'%self.output_plugin.name)
+        with self.output_plugin:
            self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
                self.opts, self.log)
        self.ui_reporter(1.)
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, re, uuid
+import os, uuid
 from itertools import cycle

 from lxml import etree
@ -19,8 +19,7 @@ class EPUBInput(InputFormatPlugin):

    recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])

-    @classmethod
-    def decrypt_font(cls, key, path):
+    def decrypt_font(self, key, path):
        raw = open(path, 'rb').read()
        crypt = raw[:1024]
        key = cycle(iter(key))
@ -29,13 +28,18 @@ class EPUBInput(InputFormatPlugin):
            f.write(decrypt)
            f.write(raw[1024:])

-    @classmethod
-    def process_encryption(cls, encfile, opf, log):
+    def process_encryption(self, encfile, opf, log):
        key = None
-        m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
-        if m:
-            key = m.group(1)
+        for item in opf.identifier_iter():
+            scheme = None
+            for key in item.attrib.keys():
+                if key.endswith('scheme'):
+                    scheme = item.get(key)
+            if (scheme and scheme.lower() == 'uuid') or \
+                    (item.text and item.text.startswith('urn:uuid:')):
+                key = str(item.text).rpartition(':')[-1]
                key = list(map(ord, uuid.UUID(key).bytes))
+
        try:
            root = etree.parse(encfile)
            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
@ -46,7 +50,8 @@ class EPUBInput(InputFormatPlugin):
                uri = cr.get('URI')
                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
                if os.path.exists(path):
-                    cls.decrypt_font(key, path)
+                    self._encrypted_font_uris.append(uri)
+                    self.decrypt_font(key, path)
            return True
        except:
            import traceback
@ -115,14 +120,17 @@ class EPUBInput(InputFormatPlugin):
        if opf is None:
            raise ValueError('%s is not a valid EPUB file'%path)

-        if os.path.exists(encfile):
-            if not self.process_encryption(encfile, opf, log):
-                raise DRMError(os.path.basename(path))
-
        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

+        self._encrypted_font_uris = []
+        if os.path.exists(encfile):
+            if not self.process_encryption(encfile, opf, log):
+                raise DRMError(os.path.basename(path))
+        self.encrypted_fonts = self._encrypted_font_uris
+
+
        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1])+'/'
            for elem in opf.itermanifest():
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -12,8 +12,9 @@ from urllib import unquote
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.constants import __appname__, __version__
-from calibre import strftime, guess_type, prepare_string_for_xml
+from calibre import strftime, guess_type, prepare_string_for_xml, CurrentDir
 from calibre.customize.conversion import OptionRecommendation
+from calibre.constants import filesystem_encoding

 from lxml import etree

@ -157,11 +158,9 @@ class EPUBOutput(OutputFormatPlugin):

        self.workaround_ade_quirks()
        self.workaround_webkit_quirks()
-        self.workaround_sony_quirks()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages()(oeb, opts)

-
        from calibre.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                max_flow_size=self.opts.flow_size*1024
@ -170,6 +169,21 @@ class EPUBOutput(OutputFormatPlugin):

        self.insert_cover()

+        self.workaround_sony_quirks()
+
+        from calibre.ebooks.oeb.base import OPF
+        identifiers = oeb.metadata['identifier']
+        uuid = None
+        for x in identifiers:
+            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
+                uuid = unicode(x).split(':')[-1]
+                break
+        if uuid is None:
+            self.log.warn('No UUID identifier found')
+            from uuid import uuid4
+            uuid = str(uuid4())
+            oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
+
        with TemporaryDirectory('_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
@ -177,10 +191,16 @@ class EPUBOutput(OutputFormatPlugin):
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
                    if x.endswith('.ncx')][0])
+            encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
+            encryption = None
+            if encrypted_fonts:
+                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)

            from calibre.ebooks.epub import initialize_container
            epub = initialize_container(output_path, os.path.basename(opf))
            epub.add_dir(tdir)
+            if encryption is not None:
+                epub.writestr('META-INF/encryption.xml', encryption)
            if opts.extract_to is not None:
                if os.path.exists(opts.extract_to):
                    shutil.rmtree(opts.extract_to)
@ -189,6 +209,52 @@ class EPUBOutput(OutputFormatPlugin):
                self.log.info('EPUB extracted to', opts.extract_to)
            epub.close()

+    def encrypt_fonts(self, uris, tdir, uuid):
+        from binascii import unhexlify
+
+        key = re.sub(r'[^a-fA-F0-9]', '', uuid)
+        if len(key) < 16:
+            raise ValueError('UUID identifier %r is invalid'%uuid)
+        key = unhexlify((key + key)[:32])
+        key = tuple(map(ord, key))
+        paths = []
+        with CurrentDir(tdir):
+            paths = [os.path.join(*x.split('/')) for x in uris]
+            uris = dict(zip(uris, paths))
+            fonts = []
+            for uri in list(uris.keys()):
+                path = uris[uri]
+                if isinstance(path, unicode):
+                    path = path.encode(filesystem_encoding)
+                if not os.path.exists(path):
+                    uris.pop(uri)
+                    continue
+                self.log.debug('Encrypting font:', uri)
+                with open(path, 'r+b') as f:
+                    data = f.read(1024)
+                    f.seek(0)
+                    for i in range(1024):
+                        f.write(chr(ord(data[i]) ^ key[i%16]))
+                if not isinstance(uri, unicode):
+                    uri = uri.decode('utf-8')
+                fonts.append(u'''
+                <enc:EncryptedData>
+                    <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>
+                    <enc:CipherData>
+                    <enc:CipherReference URI="%s"/>
+                    </enc:CipherData>
+                </enc:EncryptedData>
+                '''%(uri.replace('"', '\\"')))
+            if fonts:
+                    ans = '''<encryption
+                    xmlns="urn:oasis:names:tc:opendocument:xmlns:container"
+                    xmlns:enc="http://www.w3.org/2001/04/xmlenc#"
+                    xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">
+                    '''
+                    ans += (u'\n'.join(fonts)).encode('utf-8')
+                    ans += '\n</encryption>'
+                    return ans
+
    def default_cover(self):
        '''
        Create a generic cover for books that dont have a cover
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -20,7 +20,7 @@ from itertools import izip
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
-from calibre.constants import islinux
+from calibre.constants import islinux, isfreebsd
 from calibre import unicode_path
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
@ -346,7 +346,7 @@ class HTMLInput(InputFormatPlugin):
        self.added_resources = {}
        self.log = log
        for path, href in htmlfile_map.items():
-            if not islinux:
+            if not (islinux or isfreebsd):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
@ -417,7 +417,7 @@ class HTMLInput(InputFormatPlugin):
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
-        if not islinux:
+        if not (islinux or isfreebsd):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -215,6 +215,28 @@ def merge_results(one, two):
        else:
            one[idx].smart_update(x)

+class MetadataSources(object):
+
+    def __init__(self, sources):
+        self.sources = sources
+
+    def __enter__(self):
+        for s in self.sources:
+            s.__enter__()
+        return self
+
+    def __exit__(self, *args):
+        for s in self.sources:
+            s.__exit__()
+
+    def __call__(self, *args, **kwargs):
+        for s in self.sources:
+            s(*args, **kwargs)
+
+    def join(self):
+        for s in self.sources:
+            s.join()
+
 def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
           verbose=0):
    assert not(title is None and author is None and publisher is None and \
@ -224,11 +246,10 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
    if isbn is not None:
        isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
    fetchers = list(metadata_sources(isbndb_key=isbndb_key))
+    with MetadataSources(fetchers) as manager:
+        manager(title, author, publisher, isbn, verbose)
+        manager.join()

-    for fetcher in fetchers:
-        fetcher(title, author, publisher, isbn, verbose)
-    for fetcher in fetchers:
-        fetcher.join()
    results = list(fetchers[0].results)
    for fetcher in fetchers[1:]:
        merge_results(results, fetcher.results)
@ -243,10 +264,9 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
 def get_social_metadata(mi, verbose=0):
    from calibre.customize.ui import metadata_sources
    fetchers = list(metadata_sources(metadata_type='social'))
-    for fetcher in fetchers:
-        fetcher(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
-    for fetcher in fetchers:
-        fetcher.join()
+    with MetadataSources(fetchers) as manager:
+        manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
+        manager.join()
    ratings, tags, comments = [], set([]), set([])
    for fetcher in fetchers:
        if fetcher.results:
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -70,6 +70,17 @@ def is_recipe(filename):
        filename.rpartition('.')[0].endswith('_recipe_out')

 def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False):
+    pos = 0
+    if hasattr(stream, 'tell'):
+        pos = stream.tell()
+    try:
+        return _get_metadata(stream, stream_type, use_libprs_metadata)
+    finally:
+        if hasattr(stream, 'seek'):
+            stream.seek(pos)
+
+
+def _get_metadata(stream, stream_type, use_libprs_metadata):
    if stream_type: stream_type = stream_type.lower()
    if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
        stream_type = 'html'
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -97,9 +97,14 @@ class MetadataUpdater(object):

        self.nrecs, = unpack('>H', data[76:78])
        record0 = self.record0 = self.record(0)
+        mobi_header_length, = unpack('>I', record0[0x14:0x18])
+        if not mobi_header_length:
+            raise MobiError("Non-standard file format.  Try 'Convert E-Books' with MOBI as Input and Output formats.")
+
        self.encryption_type, = unpack('>H', record0[12:14])
        codepage, = unpack('>I', record0[28:32])
        self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
+
        image_base, = unpack('>I', record0[108:112])
        flags, = self.flags, = unpack('>I', record0[128:132])
        have_exth = self.have_exth = (flags & 0x40) != 0
@ -306,9 +311,10 @@ class MetadataUpdater(object):
        return StreamSlicer(self.stream, start, stop)

    def update(self, mi):
-        def pop_exth_record(exth_id):
-            if exth_id in self.original_exth_records:
-                self.original_exth_records.pop(exth_id)
+        def update_exth_record(rec):
+            recs.append(rec)
+            if rec[0] in self.original_exth_records:
+                self.original_exth_records.pop(rec[0])

        if self.type != "BOOKMOBI":
                raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n"
@ -323,47 +329,36 @@ class MetadataUpdater(object):
            pas = False
        if mi.author_sort and pas:
            authors = mi.author_sort
-            recs.append((100, authors.encode(self.codec, 'replace')))
-            pop_exth_record(100)
+            update_exth_record((100, authors.encode(self.codec, 'replace')))
        elif mi.authors:
            authors = '; '.join(mi.authors)
-            recs.append((100, authors.encode(self.codec, 'replace')))
-            pop_exth_record(100)
+            update_exth_record((100, authors.encode(self.codec, 'replace')))
        if mi.publisher:
-            recs.append((101, mi.publisher.encode(self.codec, 'replace')))
-            pop_exth_record(101)
+            update_exth_record((101, mi.publisher.encode(self.codec, 'replace')))
        if mi.comments:
-            recs.append((103, mi.comments.encode(self.codec, 'replace')))
-            pop_exth_record(103)
+            update_exth_record((103, mi.comments.encode(self.codec, 'replace')))
        if mi.isbn:
-            recs.append((104, mi.isbn.encode(self.codec, 'replace')))
-            pop_exth_record(104)
+            update_exth_record((104, mi.isbn.encode(self.codec, 'replace')))
        if mi.tags:
            subjects = '; '.join(mi.tags)
-            recs.append((105, subjects.encode(self.codec, 'replace')))
-            pop_exth_record(105)
+            update_exth_record((105, subjects.encode(self.codec, 'replace')))
        if mi.pubdate:
-            recs.append((106, str(mi.pubdate).encode(self.codec, 'replace')))
-            pop_exth_record(106)
+            update_exth_record((106, str(mi.pubdate).encode(self.codec, 'replace')))
        elif mi.timestamp:
-            recs.append((106, str(mi.timestamp).encode(self.codec, 'replace')))
-            pop_exth_record(106)
+            update_exth_record((106, str(mi.timestamp).encode(self.codec, 'replace')))
        elif self.timestamp:
-            recs.append((106, self.timestamp))
-            pop_exth_record(106)
+            update_exth_record((106, self.timestamp))
        else:
-            recs.append((106, nowf().isoformat().encode(self.codec, 'replace')))
-            pop_exth_record(106)
+            update_exth_record((106, nowf().isoformat().encode(self.codec, 'replace')))
        if self.cover_record is not None:
-            recs.append((201, pack('>I', self.cover_rindex)))
-            recs.append((203, pack('>I', 0)))
-            pop_exth_record(201)
-            pop_exth_record(203)
+            update_exth_record((201, pack('>I', self.cover_rindex)))
+            update_exth_record((203, pack('>I', 0)))
        if self.thumbnail_record is not None:
-            recs.append((202, pack('>I', self.thumbnail_rindex)))
-            pop_exth_record(202)
+            update_exth_record((202, pack('>I', self.thumbnail_rindex)))
+        if 503 in self.original_exth_records:
+            update_exth_record((503, mi.title.encode(self.codec, 'replace')))

-        # Restore any original EXTH fields that weren't updated
+        # Include remaining original EXTH fields
        for id in sorted(self.original_exth_records):
            recs.append((id, self.original_exth_records[id]))
        recs = sorted(recs, key=lambda x:(x[0],x[0]))
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -779,6 +779,9 @@ class OPF(object):
            self.set_text(matches[0], unicode(val))
        return property(fget=fget, fset=fset)

+    def identifier_iter(self):
+        for item in self.identifier_path(self.metadata):
+            yield item

    def guess_cover(self):
        '''
--- a/src/calibre/ebooks/metadata/rar.py
+++ b/src/calibre/ebooks/metadata/rar.py
@ -8,9 +8,10 @@ Read metadata from RAR archives
 '''

 import os
-from cStringIO import StringIO
-from calibre.ptempfile import PersistentTemporaryFile
+
+from calibre.ptempfile import PersistentTemporaryFile, TemporaryDirectory
 from calibre.libunrar import extract_member, names
+from calibre import CurrentDir

 def get_metadata(stream):
    from calibre.ebooks.metadata.archive import is_comic
@ -32,8 +33,10 @@ def get_metadata(stream):
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                data = extract_member(path, match=None, name=f)[1]
-                stream = StringIO(data)
+                with TemporaryDirectory() as tdir:
+                    with CurrentDir(tdir):
+                       stream = extract_member(path, match=None, name=f,
+                               as_file=True)[1]
                return get_metadata(stream, stream_type)
    raise ValueError('No ebook found in RAR archive')

--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -149,7 +149,8 @@ class TOC(list):

    def read_ncx_toc(self, toc):
        self.base_path = os.path.dirname(toc)
-        soup = NCXSoup(xml_to_unicode(open(toc, 'rb').read())[0])
+        raw  = xml_to_unicode(open(toc, 'rb').read(), assume_utf8=True)[0]
+        soup = NCXSoup(raw)

        def process_navpoint(np, dest):
            play_order = np.get('playOrder', None)
@ -160,7 +161,7 @@ class TOC(list):
            if nl is not None:
                text = u''
                for txt in nl.findAll(re.compile('text')):
-                    text += ''.join([unicode(s) for s in txt.findAll(text=True)])
+                    text += u''.join([unicode(s) for s in txt.findAll(text=True)])
                content = np.find(re.compile('content'))
                if content is None or not content.has_key('src') or not txt:
                    return
--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@ -43,6 +43,8 @@ def read_metadata_(task, tdir, notification=lambda x,y:x):
            import_map = {}
            for format in formats:
                nfp = run_plugins_on_import(format)
+                if nfp is None:
+                    nfp = format
                nfp = os.path.abspath(nfp)
                if isinstance(nfp, unicode):
                    nfp.encode(filesystem_encoding)
--- a/src/calibre/ebooks/metadata/zip.py
+++ b/src/calibre/ebooks/metadata/zip.py
@ -3,9 +3,10 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 import os
-from zipfile import ZipFile
-from cStringIO import StringIO

+from calibre.utils.zipfile import ZipFile
+from calibre.ptempfile import TemporaryDirectory
+from calibre import CurrentDir

 def get_metadata(stream):
    from calibre.ebooks.metadata.meta import get_metadata
@ -23,8 +24,10 @@ def get_metadata(stream):
            stream_type = stream_type[1:]
            if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub',
                               'rb', 'imp', 'pdf', 'lrf'):
-                stream = StringIO(zf.read(f))
-                return get_metadata(stream, stream_type)
+                with TemporaryDirectory() as tdir:
+                    with CurrentDir(tdir):
+                        path = zf.extract(f)
+                        return get_metadata(open(path, 'rb'), stream_type)
    raise ValueError('No ebook found in ZIP archive')


--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@ -154,7 +154,7 @@ class MOBIOutput(OutputFormatPlugin):
                MobiWriter, PALMDOC, UNCOMPRESSED
        from calibre.ebooks.mobi.mobiml import MobiMLizer
        from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
-        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
+        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
        from calibre.customize.ui import plugin_for_input_format
        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
@ -163,8 +163,11 @@ class MOBIOutput(OutputFormatPlugin):
            tocadder(oeb, opts)
        mangler = CaseMangler()
        mangler(oeb, opts)
+        try:
            rasterizer = SVGRasterizer()
            rasterizer(oeb, opts)
+        except Unavailable:
+            self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
        mobimlizer(oeb, opts)
        self.check_for_periodical()
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -4,12 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''

-import functools
-import os
-import re
-import struct
-import textwrap
-import cStringIO
+import functools, shutil, os, re, struct, textwrap, cStringIO, sys

 try:
    from PIL import Image as PILImage
@ -619,6 +614,16 @@ class MobiReader(object):
                * opf.cover.split('/'))):
                opf.cover = None

+        cover = opf.cover
+        if cover is not None:
+            cover = cover.replace('/', os.sep)
+            if os.path.exists(cover):
+                ncover = 'images'+os.sep+'calibre_cover.jpg'
+                if os.path.exists(ncover):
+                    os.remove(ncover)
+                shutil.copyfile(cover, ncover)
+            opf.cover = ncover.replace(os.sep, '/')
+
        manifest = [(htmlfile, 'application/xhtml+xml'),
            (os.path.abspath('styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
@ -796,15 +801,22 @@ class MobiReader(object):
 def get_metadata(stream):
    from calibre.utils.logging import Log
    log = Log()
-
    mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
-    try:
    mh = MetadataHeader(stream, log)
+    if mh.title and mh.title != _('Unknown'):
+        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
+        size = sys.maxint
+        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
+            pos = stream.tell()
+            stream.seek(0, 2)
+            size = stream.tell()
+            stream.seek(pos)
+        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
@ -818,10 +830,12 @@ def get_metadata(stream):
    else:
        data  = mh.section_data(mh.first_image_index)
    buf = cStringIO.StringIO(data)
+    try:
        im = PILImage.open(buf)
-        obuf = cStringIO.StringIO()
-        im.convert('RGBA').save(obuf, format='JPEG')
-        mi.cover_data = ('jpg', obuf.getvalue())
    except:
-        log.exception()
+        log.exception('Failed to read MOBI cover')
+    else:
+        obuf = cStringIO.StringIO()
+        im.convert('RGB').save(obuf, format='JPEG')
+        mi.cover_data = ('jpg', obuf.getvalue())
    return mi
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -152,13 +152,17 @@ class EbookIterator(object):
                        prints('Substituting font family: %s -> %s'%(bad, good))
                        return match.group().replace(bad, '"%s"'%good)

+            from calibre.ebooks.chardet import force_encoding
            for csspath in css_files:
                with open(csspath, 'r+b') as f:
                    css = f.read()
-                    css = font_family_pat.sub(prepend_embedded_font, css)
+                    enc = force_encoding(css, False)
+                    css = css.decode(enc, 'replace')
+                    ncss = font_family_pat.sub(prepend_embedded_font, css)
+                    if ncss != css:
                        f.seek(0)
                        f.truncate()
-                    f.write(css)
+                        f.write(ncss.encode(enc))

    def __enter__(self, processed=False):
        self.delete_on_exit = []
@ -173,11 +177,12 @@ class EbookIterator(object):
            plumber.opts.no_process = True

        plumber.input_plugin.for_viewer = True
+        with plumber.input_plugin:
            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
                plumber.opts, plumber.input_fmt, self.log,
                {}, self.base)

-        if processed or plumber.input_fmt.lower() in ('pdf', 'rb') and \
+        if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \
                not hasattr(self.pathtoopf, 'manifest'):
            self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
                    plumber.input_plugin)
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -331,7 +331,10 @@ class OEBReader(object):
            id = child.get('id')
            klass = child.get('class', 'chapter')

+            try:
                po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
+            except:
+                po = self.oeb.toc.next_play_order()

            authorElement = xpath(child,
                    'descendant::calibre:meta[@name = "author"]')
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -190,11 +190,11 @@ class Stylizer(object):
                    selector = CSSSelector(ntext)
                    matches = selector(tree)

-            if not matches and class_sel_pat.match(text):
+            if not matches and class_sel_pat.match(text) and text.lower() != text:
                found = False
+                ltext = text.lower()
                for x in tree.xpath('//*[@class]'):
-                    if text.lower().endswith('.'+x.get('class').lower()) and \
-                            text.lower() != text:
+                    if ltext.endswith('.'+x.get('class').lower()):
                        matches.append(x)
                        found = True
                if found:
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@ -27,11 +27,14 @@ from calibre.ebooks.oeb.stylizer import Stylizer
 IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
 KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align'])

+class Unavailable(Exception):
+    pass
+
 class SVGRasterizer(object):
    def __init__(self):
        from calibre.gui2 import is_ok_to_use_qt
        if not is_ok_to_use_qt():
-            raise Exception('Not OK to use Qt')
+            raise Unavailable('Not OK to use Qt')

    @classmethod
    def config(cls, cfg):
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@ -29,7 +29,7 @@ class RescaleImages(object):


        page_width, page_height = self.opts.dest.width, self.opts.dest.height
-        if not self.opts.is_image_collection:
+        if not getattr(self.opts, 'is_image_collection', False):
            page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
            page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
        for item in self.oeb.manifest:
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -11,12 +11,14 @@ class PDBError(Exception):
 from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
 from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
 from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
+from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader

 FORMAT_READERS = {
    'PNPdPPrs': ereader_reader,
    'PNRdPPrs': ereader_reader,
    'zTXTGPlm': ztxt_reader,
    'TEXtREAd': palmdoc_reader,
+    '.pdfADBE': pdf_reader,
 }

 from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
@ -34,8 +36,8 @@ IDENTITY_TO_NAME = {
    'PNRdPPrs': 'eReader',
    'zTXTGPlm': 'zTXT',
    'TEXtREAd': 'PalmDOC',
-
    '.pdfADBE': 'Adobe Reader',
+
    'BVokBDIC': 'BDicty',
    'DB99DBOS': 'DB (Database program)',
    'vIMGView': 'FireViewer (ImageViewer)',
--- a/src/calibre/ebooks/pdb/pdf/init.py
+++ b/src/calibre/ebooks/pdb/pdf/init.py
--- a/src/calibre/ebooks/pdb/pdf/reader.py
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+'''
+Read content from palmdoc pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ptempfile import TemporaryFile
+
+class Reader(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        self.header = header
+        self.stream = stream
+        self.log = log
+        self.options = options
+        setattr(self.options, 'new_pdf_engine', False)
+        setattr(self.options, 'no_images', False)
+        setattr(self.options, 'unwrap_factor', 0.5)
+
+    def extract_content(self, output_dir):
+        self.log.info('Extracting PDF...')
+
+        with TemporaryFile() as pdf_n:
+            pdf = open(pdf_n, 'rwb')
+            for x in xrange(self.header.section_count()):
+                pdf.write(self.header.section_data(x))
+
+            from calibre.customize.ui import plugin_for_input_format
+            pdf.seek(0)
+            return plugin_for_input_format('pdf').convert(pdf, self.options,
+                'pdf', self.log, [])
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -13,7 +13,7 @@ from functools import partial

 from calibre.ebooks import ConversionError, DRMError
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre import isosx, iswindows, islinux
+from calibre import isosx, iswindows, islinux, isfreebsd
 from calibre import CurrentDir

 PDFTOHTML = 'pdftohtml'
@ -23,7 +23,7 @@ if isosx and hasattr(sys, 'frameworks_dir'):
 if iswindows and hasattr(sys, 'frozen'):
    PDFTOHTML = os.path.join(os.path.dirname(sys.executable), 'pdftohtml.exe')
    popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
-if islinux and getattr(sys, 'frozen_path', False):
+if (islinux or isfreebsd) and getattr(sys, 'frozen_path', False):
    PDFTOHTML = os.path.join(getattr(sys, 'frozen_path'), 'pdftohtml')

 def pdftohtml(output_dir, pdf_path, no_images):
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -72,14 +72,14 @@ class PML_HTMLizer(object):
        'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
        'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
        'r': ('<div style="text-align: right;">', '</div>'),
-        't': ('<div style="margin-left: 5%;">', '</div>'),
-        'T': ('<div style="margin-left: %s;">', '</div>'),
+        't': ('<div style="text-indent: 5%;">', '</div>'),
+        'T': ('<div style="text-indent: %s;">', '</div>'),
        'i': ('<span style="font-style: italic;">', '</span>'),
        'u': ('<span style="text-decoration: underline;">', '</span>'),
        'd': ('<span style="text-decoration: line-through;">', '</span>'),
        'b': ('<span style="font-weight: bold;">', '</span>'),
        'l': ('<span style="font-size: 150%;">', '</span>'),
-        'k': ('<span style="font-size: 75%;">', '</span>'),
+        'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
        'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
        'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>'),
    }
@ -154,6 +154,11 @@ class PML_HTMLizer(object):
        self.file_name = ''

    def prepare_pml(self, pml):
+        # Give Chapters the form \\*='text'text\\*. This is used for generating
+        # the TOC later.
+        pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
+        pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
+
        # Remove comments
        pml = re.sub(r'(?mus)\\v(?P<text>.*?)\\v', '', pml)

@ -163,7 +168,7 @@ class PML_HTMLizer(object):
        pml = re.sub(r'(?mus)(?<=.)[ ]*$', '', pml)
        pml = re.sub(r'(?mus)^[ ]*$', '', pml)

-        # Footnotes and Sidebars
+        # Footnotes and Sidebars.
        pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
        pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)

@ -171,9 +176,7 @@ class PML_HTMLizer(object):
        # &. It will display as &amp;
        pml = pml.replace('&', '&amp;')

-        pml = re.sub(r'(?<=\\x)(?P<text>.*?)(?=\\x)', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
-        pml = re.sub(r'(?<=\\X[0-4])(?P<text>.*?)(?=\\X[0-4])', lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text')), pml)
-
+        # Replace \\a and \\U with either the unicode character or the entity.
        pml = re.sub(r'\\a(?P<num>\d{3})', lambda match: '&#%s;' % match.group('num'), pml)
        pml = re.sub(r'\\U(?P<num>[0-9a-f]{4})', lambda match: '%s' % my_unichr(int(match.group('num'), 16)), pml)

@ -536,6 +539,7 @@ class PML_HTMLizer(object):
                        elif '%s%s' % (c, l) == 'Sd':
                            text = self.process_code('Sd', line, 'sb')
                    elif c in 'xXC':
+                        empty = False
                        # The PML was modified eariler so x and X put the text
                        # inside of ="" so we don't have do special processing
                        # for C.
@ -578,9 +582,6 @@ class PML_HTMLizer(object):
                else:
                    if c != ' ':
                        empty = False
-                    if self.state['k'][0]:
-                        text = c.upper()
-                    else:
                    text = c
                parsed.append(text)
                c = line.read(1)
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -131,7 +131,7 @@ class PMLMLizer(object):
                if item.href in self.link_hrefs.keys():
                    toc.append('* \\q="#%s"%s\\q\n' % (self.link_hrefs[item.href], item.title))
                else:
-                    self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
+                    self.oeb_book.warn('Ignoring toc item: %s not found in document.' % item)
        return ''.join(toc)

    def get_text(self):
--- a/src/calibre/ebooks/rtf/preprocess.py
+++ b/src/calibre/ebooks/rtf/preprocess.py
@ -131,9 +131,9 @@ class RtfTokenParser():
                if isString(self.tokens[i].name, "\\'"):
                    i = i + 1
                    if not isinstance(self.tokens[i], tokenData):
-                        raise BaseException('Error: token8bitChar without data.')
+                        raise Exception('Error: token8bitChar without data.')
                    if len(self.tokens[i].data) < 2:
-                        raise BaseException('Error: token8bitChar without data.')
+                        raise Exception('Error: token8bitChar without data.')
                    newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
                    if len(self.tokens[i].data) > 2:
                        newTokens.append(tokenData(self.tokens[i].data[2:]))
@ -195,7 +195,7 @@ class RtfTokenParser():
                            i = i + 1
                            j = j + 1
                            continue
-                        raise BaseException('Error: incorect utf replacement.')
+                        raise Exception('Error: incorect utf replacement.')

                    #calibre rtf2xml does not support utfreplace
                    replace = []
@ -248,7 +248,7 @@ class RtfTokenizer():

            if isChar(self.rtfData[i], '\\'):
                if i + 1 >= len(self.rtfData):
-                    raise BaseException('Error: Control character found at the end of the document.')
+                    raise Exception('Error: Control character found at the end of the document.')

                if lastDataStart > -1:
                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
@ -269,7 +269,7 @@ class RtfTokenizer():
                        i = i + 1

                    if not consumed:
-                        raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
+                        raise Exception('Error (at:%d): Control Word without end.'%(tokenStart))

                    #we have numeric argument before delimiter
                    if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
@ -283,10 +283,10 @@ class RtfTokenizer():
                            l = l + 1
                            i = i + 1
                            if l > 10 :
-                                raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
+                                raise Exception('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])

                        if not consumed:
-                            raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
+                            raise Exception('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])

                    separator = ''
                    if isChar(self.rtfData[i], ' '):
--- a/Show More
+++ b/Show More