Merge from trunk

2025-06-23 15:30:45 -04:00 · 2011-10-29 06:47:13 +02:00 · 2011-10-29 06:47:13 +02:00 · 93d1fc23e0
commit 93d1fc23e0
parent bcd5352be0 87c374bc36
213 changed files with 75114 additions and 60553 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,202 @@
 #  new recipes:
 #    - title: 

+- version: 0.8.24
+  date: 2011-10-27
+
+  new features:
+    - title: "Kobo: Add support for fetching annotations from the kobo reader."
+      description: "Right click the send to device button in calibre with your kobo connected and choose fetch annotations. The annotations are placed into the comments of the corresponding books in the calibre library. This feature is still experimental."
+      type: major
+
+    - title: "Preserve the set of selected books in the library view when a device is connected, fixing a long standing annoyance"
+  
+  bug fixes:
+    - title: "Prevent changing of device metadata management option while a device is connected."
+      tickets: [874118]
+
+    - title: "Book details panel: Show tooltip only when hovering over cover, not the rest of the book information, as it makes it hard to read."
+      tickets: [876454]
+
+    - title: "MOBI Output: Fix use of list elements as link anchors caused links to always point to start of list."
+      tickets: [879391]
+
+    - title: "RB Output: Fix calibre generated rb files not being opened by the RocketBook."
+      tickets: [880930]
+
+    - title: "FB2 Input: Dont choke on FB2 files that have empty embedded content tags."
+      tickets: [880904] 
+
+    - title: "ODT Input: CSS rationalization should not fail with non ascii class names"
+
+    - title: "Fix creating new library using the copy structure option incorrectly setting all text type columns to be like the tags column"
+
+    - title: "E-book viewer: Don't choke on windows installs with a non UTF-8 filesystem encoding."
+      tickets: [879740]
+
+
+  improved recipes:
+    - Novaya Gazeta
+    - El Universal (Venezuela)
+    - The Australian (subscription enabled)
+    - Metro NL
+    - The Scotsman
+    - Japan Times
+
+  new recipes:
+    - title: Silicon Republic 
+      author: Neil Grogan
+
+    - title: Calibre Blog 
+      author: Krittika Goyal
+
+- version: 0.8.23
+  date: 2011-10-21
+
+  new features:
+    - title: "Drivers for T-Mobile Move, new Pandigital Novel, New Onyx Boox and Freescale MX 515"
+
+    - title: "SONY T1 driver: Support for periodicals and better timezone detection"
+
+    - title: "Add a remove cover entry to the right click menu of the cover display in the right panel"
+      tickets: [874689]
+ 
+  bug fixes:
+    - title: "Amazon metadata download: Fix for change in Amazon website that broke downloading metadata."
+      tickets: [878395]
+
+    - title: "MOBI metadata: When reading titles from MOBI files only use the title in the PDB header if there is no long title in the EXTH header"
+      tickets: [ 875243 ]
+
+    - title: "Fix regression that broke use of complex custom columns in save to disk templates."
+      tickets: [877366] 
+
+    - title: "Fix regression that broke reading metadata from CHM files"
+    
+    - title: "Fix a bug that broke conversion of some zipped up HTML files with non ascii filenames on certain windows installs."
+      tickets: [873288] 
+
+    - title: "RTF Input: Fix bug in handling of paragraph separators."
+      tickets: [863735]
+
+    - title: "Fix a regression that broke downloading certain periodicals for the Kindle."
+      tickets: [875595]
+
+    - title: "Fix regression that broke updating of covers inside ebook files when saving to disk"
+
+    - title: "Fix regression breaking editing the 'show in tag browser' checkbox in custom column setup editing"
+
+    - title: "Fix typo that broke stopping selected jobs in 0.8.22"
+
+  improved recipes:
+    - Columbus Dispatch
+    - Ming Pao
+    - La Republica
+    - Korea Times
+    - USA Today
+    - CNN
+    - Liberation
+    - El Pais
+    - Helsingin Sanomat
+
+  new recipes:
+    - title: Kyugyhang, Hankyoreh and Hankyoreh21
+      author: Seongkyoun Yoo.
+
+    - title: English Katherimini 
+      author: Thomas Scholl
+
+    - title: Various French news sources
+      author: Aurelien Chabot.
+
+- version: 0.8.22
+  date: 2011-10-14
+
+  new features:
+    - title: "Input plugin for OCR-ed DJVU files (i.e. .djvu files that contain text. Only the text is converted)"
+      type: major
+
+    - title: "Driver for the SONY PRS T1"
+
+    - title: "Add a 'Back' button to the metadata download dialog while downloading covers, so that you can go back and select a different match if you dont lke the covers, instead of having to re-do the entire download."
+      tickets: [855055]
+
+    - title: "Add an option in Preferences->Saving to disk to not show files in file browser after saving to disk"
+
+    - title: "Get Books: Add the amazon.fr store. Remove leading 'by' from author names. Fix encoding issues with non English titles/names"
+
+    - title: "Driver for Onyx BOOX A61S/X61S"
+      tickets: [872741] 
+
+    - title: "Kobo: Add support for uploading new covers to the device without converting the ePub. You can just resend the book to have the cover updated"
+
+    - title: "Make it a little harder to ignore the fact that there are multiple toolbars when customizing toolbars"
+      tickets: [864589]
+  
+  bug fixes:
+    - title:   "MOBI Input: Remove invalid tags of the form <xyz: >"
+      tickets: [872883]
+
+    - title: "calibredb add_format does not refresh running calibre instance"
+      tickets: [872961] 
+
+    - title: "Conversion pipeline: Translate <font face> to CSS font-family"
+      tickets: [871388]
+
+    - title: "When sending email add a Date: header so that amavis does not consider the emails to be spam"
+
+    - title: "Fix for the problem where setting the restriction to an empty current search clears the restriction box but does not clear the restriction." 
+      tickets: [871921]
+
+    - title: "Fix generation of column coloring rules for date/time columns"
+    
+    - title: "Fix plugboard problem where customizations to formats accepted by a device were ignored."
+    
+    - title: "Enable adding of various actions to the toolbar when device is connected (they had been erroneously marked as being non-addable)"
+
+    - title: "Fixable content in library check is not hidden after repair"
+      tickets: [864096]
+
+    - title: "Catalog generation: Handle a corrupted thumbnail cache."
+
+    - title: "Do not error out when user clicks stop selected job with no job selected."
+      tickets: [863766]
+
+  improved recipes:
+    - automatiseringgids 
+    - CNET
+    - Geek and Poke
+    - Gosc Niedzielny
+    - Dilbert
+    - Economist
+    - Ming Pao
+    - Metro UK
+    - Heise Online
+    - FAZ.net
+    - Houston Chronicle
+    - Slate
+    - Descopera
+
+  new recipes:
+    - title: WoW Insider 
+      author: Krittika Goyal
+
+    - title: Merco Press and Penguin news
+      author: Russell Phillips
+
+    - title: Defense News
+      author: Darko Miletic
+
+    - title: Revista Piaui 
+      author: Eduardo Simoes
+
+    - title: Dark Horizons
+      author: Jaded
+
+    - title: Various polish news sources
+      author: fenuks
+
+
 - version: 0.8.21
  date: 2011-09-30

--- a/recipes/20minutes.recipe
+++ b/recipes/20minutes.recipe
@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
+'''
+20minutes.fr
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Minutes(BasicNewsRecipe):
+
+    title                  = '20 minutes'
+    __author__             = 'calibre'
+    description            = 'Actualités'
+    encoding               = 'cp1252'
+    publisher              = '20minutes.fr'
+    category               = 'Actualités, France, Monde'
+    language               = 'fr'
+
+    use_embedded_content   = False
+    timefmt                = ' [%d %b %Y]'
+    max_articles_per_feed  = 15
+    no_stylesheets         = True
+    remove_empty_feeds     = True
+    filterDuplicates       = True
+
+    extra_css = '''
+                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    .mna-details {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .mna-image {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .mna-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                '''
+
+    remove_tags = [
+            dict(name='iframe'),
+            dict(name='div', attrs={'class':['mn-section-heading']}),
+            dict(name='a', attrs={'href':['#commentaires']}),
+            dict(name='div', attrs={'class':['mn-right']}),
+            dict(name='div', attrs={'class':['mna-box']}),
+            dict(name='div', attrs={'class':['mna-comment-call']}),
+            dict(name='div', attrs={'class':['mna-tools']}),
+            dict(name='div', attrs={'class':['mn-trilist']})
+    ]
+
+    keep_only_tags    = [dict(id='mn-article')]
+
+    remove_tags_after  = dict(name='div', attrs={'class':['mna-body','mna-signature']})
+
+
+    feeds = [
+        ('France', 'http://www.20minutes.fr/rss/actu-france.xml'),
+        ('International', 'http://www.20minutes.fr/rss/monde.xml'),
+        ('Tech/Web', 'http://www.20minutes.fr/rss/hightech.xml'),
+        ('Sciences', 'http://www.20minutes.fr/rss/sciences.xml'),
+        ('Economie', 'http://www.20minutes.fr/rss/economie.xml'),
+        ('Politique', 'http://www.20minutes.fr/rss/politique.xml'),
+        (u'Médias', 'http://www.20minutes.fr/rss/media.xml'),
+        ('Cinema', 'http://www.20minutes.fr/rss/cinema.xml'),
+        ('People', 'http://www.20minutes.fr/rss/people.xml'),
+        ('Culture', 'http://www.20minutes.fr/rss/culture.xml'),
+        ('Sport', 'http://www.20minutes.fr/rss/sport.xml'),
+        ('Paris', 'http://www.20minutes.fr/rss/paris.xml'),
+        ('Lyon', 'http://www.20minutes.fr/rss/lyon.xml'),
+        ('Toulouse', 'http://www.20minutes.fr/rss/toulouse.xml')
+    ]
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/recipes/automatiseringgids.recipe
+++ b/recipes/automatiseringgids.recipe
@ -10,27 +10,15 @@ class autogids(BasicNewsRecipe):
    publisher              = 'AutomatiseringGids'
    category               = 'Nieuws, IT, Nederlandstalig'
    simultaneous_downloads = 5
-    #delay          = 1
-    timefmt        = ' [%A, %d %B, %Y]'
-    #timefmt        = ''
+    timefmt        = ' [%a, %d %B, %Y]'
    no_stylesheets = True
    remove_javascript = True
    remove_empty_feeds = True
    publication_type = 'newspaper'
    encoding              = 'utf-8'
-    cover_url    = 'http://www.automatiseringgids.nl/siteimg/header_logo.gif'
-    keep_only_tags = [dict(id=['content'])]
-    extra_css = '.artikelheader {font-size:0.8em; color: #666;} .artikelintro {font-weight:bold} div.imgArticle {float: right; margin: 0 0em 1em 1em; display: block; position: relative; } \
-        h2 { margin: 0 0 0.5em; min-height: 30px; font-size: 1.5em; letter-spacing: -0.2px; margin: 0 0 0.5em; color: black; font-weight: bold; line-height: 1.2em; padding: 4px 3px 0; }'
+    cover_url    = 'http://www.automatiseringgids.nl/binaries/content/gallery/ag/marketing/ag-avatar-100x50.jpg'
+    keep_only_tags = [dict(name='div', attrs={'class':['content']})]

-
-
-    remove_tags = [dict(name='div', attrs={'id':['loginbox','reactiecollapsible','reactiebox']}),
-        dict(name='div', attrs={'class':['column_a','column_c','bannerfullsize','reactieheader','reactiecollapsible','formulier','artikel_headeroptions']}),
-        dict(name='ul', attrs={'class':['highlightlist']}),
-        dict(name='input', attrs={'type':['button']}),
-        dict(name='div', attrs={'style':['display:block; width:428px; height:30px; float:left;']}),
-    ]
    preprocess_regexps = [
        (re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
        lambda match: ''),
--- a/recipes/brand_eins.recipe
+++ b/recipes/brand_eins.recipe
@ -110,8 +110,10 @@ class BrandEins(BasicNewsRecipe):
    selected_issue = issue_map[selected_issue_key]
    url = selected_issue.get('href', False)
    # Get the title for the magazin - build it out of the title of the cover - take the issue and year;
-    self.title = "brand eins " + selected_issue_key[4:] + "/" + selected_issue_key[0:4]
+    # self.title = "brand eins " + selected_issue_key[4:] + "/" + selected_issue_key[0:4]
+    # Get the alternative title for the magazin - build it out of the title of the cover - without the issue and year;
    url = 'http://brandeins.de/'+url
+    self.timefmt = ' ' + selected_issue_key[4:] + '/' + selected_issue_key[:4]

    # url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
    titles_and_articles = self.brand_eins_parse_issue(url)
@ -163,4 +165,3 @@ class BrandEins(BasicNewsRecipe):
          current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
    titles_and_articles.append([chapter_title, current_articles])
    return titles_and_articles
-
--- a/recipes/calibre_blog.recipe
+++ b/recipes/calibre_blog.recipe
@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CalibreBlog(BasicNewsRecipe):
+    title          = u'Calibre Blog'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1000 #days
+    max_articles_per_feed = 5
+    use_embedded_content = False
+
+    no_stylesheets = True
+    auto_cleanup = True
+
+
+    feeds          = [
+('Article',
+ 'http://blog.calibre-ebook.com/feeds/posts/default'),
+]
--- a/recipes/cnetnews.recipe
+++ b/recipes/cnetnews.recipe
@ -5,8 +5,8 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 Changelog:
 2011-09-24
 Changed cover (drMerry)
-'''
-'''
+2011-10-13
+Updated Cover (drMerry)
 news.cnet.com
 '''

@ -24,7 +24,7 @@ class CnetNews(BasicNewsRecipe):
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'en'
-
+    cover_url = 'http://reviews.cnet.com/i/ff/wp/logo_cnet.gif'
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
--- a/recipes/cnn.recipe
+++ b/recipes/cnn.recipe
@ -22,6 +22,14 @@ class CNN(BasicNewsRecipe):
    #match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
    max_articles_per_feed = 25

+    extra_css = '''
+                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    .cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                '''
+
    preprocess_regexps = [
        (re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
        (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
@ -32,7 +40,12 @@ class CNN(BasicNewsRecipe):
    remove_tags = [
            {'class':['cnn_strybtntools', 'cnn_strylftcntnt',
                'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
-                'cnn_strycntntrgt', 'hed_side', 'foot']},
+                'cnn_strycntntrgt', 'hed_side', 'foot', 'cnn_strylftcntnt cnn_strylftcexpbx']},
+            {'class':['cnn_html_media_title_new', 'cnn_html_media_title_new cnn_html_media_title_none',
+                'cnnArticleGalleryCaptionControlText', 'articleGalleryNavContainer']},
+            {'id':['articleGalleryNav00JumpPrev', 'articleGalleryNav00Prev',
+                'articleGalleryNav00Next', 'articleGalleryNav00JumpNext']},
+            {'style':['display:none']},
            dict(id=['ie_column']),
    ]

@ -58,3 +71,12 @@ class CNN(BasicNewsRecipe):
        ans = BasicNewsRecipe.get_article_url(self, article)
        return ans.partition('?')[0]

+    def get_masthead_url(self):
+        masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead
--- a/recipes/columbusdispatch.recipe
+++ b/recipes/columbusdispatch.recipe
@ -14,67 +14,43 @@ class ColumbusDispatchRecipe(BasicNewsRecipe):
    use_embedded_content = False
    remove_empty_feeds = True
    oldest_article = 1.2
-    max_articles_per_feed = 100
+    use_embedded_content = False

    no_stylesheets = True
-    remove_javascript = True
-    encoding = 'utf-8'
-    # Seems to work best, but YMMV
-    simultaneous_downloads = 2
-
+    auto_cleanup = True
+    #auto_cleanup_keep = '//div[@id="story-photos"]'
    # Feeds from http://www.dispatch.com/live/content/rss/index.html
-    feeds = []
-    feeds.append((u'News: Local and state news', u'http://www.dispatch.com/live/static/crt/2_rss_localnews.xml'))
-    feeds.append((u'News: National news', u'http://www.dispatch.com/live/static/crt/2_rss_nationalnews.xml'))
-    feeds.append((u'News: Editorials', u'http://www.dispatch.com/live/static/crt/2_rss_editorials.xml'))
-    feeds.append((u'News: Columnists', u'http://www.dispatch.com/live/static/crt/2_rss_columnists.xml'))
-    feeds.append((u'News: Health news', u'http://www.dispatch.com/live/static/crt/2_rss_health.xml'))
-    feeds.append((u'News: Science news', u'http://www.dispatch.com/live/static/crt/2_rss_science.xml'))
-    feeds.append((u'Sports: OSU football', u'http://www.dispatch.com/live/static/crt/2_rss_osufootball.xml'))
-    feeds.append((u'Sports: OSU men\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osumensbball.xml'))
-    feeds.append((u'Sports: OSU women\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osuwomensbball.xml'))
-    feeds.append((u'Sports: OSU sports', u'http://www.dispatch.com/live/static/crt/2_rss_osusports.xml'))
-    feeds.append((u'Sports: Blue Jackets', u'http://www.dispatch.com/live/static/crt/2_rss_bluejackets.xml'))
-    feeds.append((u'Sports: Crew', u'http://www.dispatch.com/live/static/crt/2_rss_crew.xml'))
-    feeds.append((u'Sports: Clippers', u'http://www.dispatch.com/live/static/crt/2_rss_clippers.xml'))
-    feeds.append((u'Sports: Indians', u'http://www.dispatch.com/live/static/crt/2_rss_indians.xml'))
-    feeds.append((u'Sports: Reds', u'http://www.dispatch.com/live/static/crt/2_rss_reds.xml'))
-    feeds.append((u'Sports: Golf', u'http://www.dispatch.com/live/static/crt/2_rss_golf.xml'))
-    feeds.append((u'Sports: Outdoors', u'http://www.dispatch.com/live/static/crt/2_rss_outdoors.xml'))
-    feeds.append((u'Sports: Cavs/NBA', u'http://www.dispatch.com/live/static/crt/2_rss_cavaliers.xml'))
-    feeds.append((u'Sports: High Schools', u'http://www.dispatch.com/live/static/crt/2_rss_highschools.xml'))
-    feeds.append((u'Sports: Browns', u'http://www.dispatch.com/live/static/crt/2_rss_browns.xml'))
-    feeds.append((u'Sports: Bengals', u'http://www.dispatch.com/live/static/crt/2_rss_bengals.xml'))
-    feeds.append((u'Sports: Auto Racing', u'http://www.dispatch.com/live/static/crt/2_rss_autoracing.xml'))
-    feeds.append((u'Business News', u'http://www.dispatch.com/live/static/crt/2_rss_business.xml'))
-    feeds.append((u'Features: Weekender', u'http://www.dispatch.com/live/static/crt/2_rss_weekender.xml'))
-    feeds.append((u'Features: Life and Arts', u'http://www.dispatch.com/live/static/crt/2_rss_lifearts.xml'))
-    feeds.append((u'Features: Food', u'http://www.dispatch.com/live/static/crt/2_rss_food.xml'))
-    feeds.append((u'Features: NOW! for kids', u'http://www.dispatch.com/live/static/crt/2_rss_now.xml'))
-    feeds.append((u'Features: Travel', u'http://www.dispatch.com/live/static/crt/2_rss_travel.xml'))
-    feeds.append((u'Features: Home and Garden', u'http://www.dispatch.com/live/static/crt/2_rss_homegarden.xml'))
-    feeds.append((u'Features: Faith and Values', u'http://www.dispatch.com/live/static/crt/2_rss_faithvalues.xml'))
-    #feeds.append((u'', u''))
+    feeds          = [
+('Local', 
+ 'http://www.dispatch.com/content/syndication/news_local-state.xml'),
+('National', 
+ 'http://www.dispatch.com/content/syndication/news_national.xml'),
+('Business', 
+ 'http://www.dispatch.com/content/syndication/news_business.xml'),
+('Editorials', 
+ 'http://www.dispatch.com/content/syndication/opinion_editorials.xml'),
+('Columnists', 
+ 'http://www.dispatch.com/content/syndication/opinion_columns.xml'),
+('Life and Arts', 
+ 'http://www.dispatch.com/content/syndication/lae_life-and-arts.xml'),
+ ('OSU Sports', 
+ 'http://www.dispatch.com/content/syndication/sports_osu.xml'),
+ ('Auto Racing', 
+ 'http://www.dispatch.com/content/syndication/sports_auto-racing.xml'),
+ ('Outdoors', 
+ 'http://www.dispatch.com/content/syndication/sports_outdoors.xml'),
+ ('Bengals', 
+ 'http://www.dispatch.com/content/syndication/sports_bengals.xml'),
+  ('Indians', 
+ 'http://www.dispatch.com/content/syndication/sports_indians.xml'),
+ ('Clippers', 
+ 'http://www.dispatch.com/content/syndication/sports_clippers.xml'),
+ ('Crew', 
+ 'http://www.dispatch.com/content/syndication/sports_crew.xml'),
+ ('Reds', 
+ 'http://www.dispatch.com/content/syndication/sports_reds.xml'),
+ ('Blue Jackets', 
+ 'http://www.dispatch.com/content/syndication/sports_bluejackets.xml'),
+]

-    keep_only_tags = []
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'colhed'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'hed'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subhed'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'date'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'byline'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'srcline'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'body'}))
-
-    remove_tags = []
-    remove_tags.append(dict(name = 'div', attrs = {'id': 'middle-story-ad-container'}))
-
-    extra_css = '''
-                body {font-family:verdana,arial,helvetica,geneva,sans-serif ;}
-                a {text-decoration: none; color: blue;}
-                div.colhed {font-weight: bold;}
-                div.hed {font-size: xx-large; font-weight: bold; margin-bottom: 0.2em;}
-                div.subhed {font-size: large;}
-                div.date {font-size: x-small; font-style: italic; color: #666666; margin-top: 0.4em; margin-bottom: 0.4em;}
-                div.byline, div.srcline {font-size: small; color: #696969;}
-                '''

--- a/recipes/dallas.recipe
+++ b/recipes/dallas.recipe
@ -8,11 +8,7 @@ class DallasNews(BasicNewsRecipe):

    no_stylesheets = True
    use_embedded_content = False
-    remove_tags_before = dict(name='h1')
-    keep_only_tags = {'class':lambda x: x and 'article' in x}
-    remove_tags = [
-            {'class':['DMNSocialTools', 'article ', 'article first ', 'article premium']},
-    ]
+    auto_cleanup = True

    feeds          = [
                      ('Local News',
--- a/recipes/defensenews.recipe
+++ b/recipes/defensenews.recipe
@ -0,0 +1,62 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.defensenews.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DefenseNews(BasicNewsRecipe):
+    title                 = 'Defense News'
+    __author__            = 'Darko Miletic'
+    description           = 'Find late-breaking defense news from the leading defense news weekly'
+    publisher             = 'Gannett Government Media Corporation'
+    category              = 'defense news, defence news, defense, defence, defence budget, defence policy'
+    oldest_article        = 31
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                               .info{font-size: small; color: gray}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [
+                     dict(name=['meta','link'])
+                    ,dict(attrs={'class':['toolbar','related','left','right']})
+                  ]
+    remove_tags_before = attrs={'class':'storyWrp'}
+    remove_tags_after = attrs={'class':'middle'}
+
+    remove_attributes=['lang']
+
+    feeds = [
+              (u'Europe'  , u'http://www.defensenews.com/rss/eur/'            )
+             ,(u'Americas', u'http://www.defensenews.com/rss/ame/'            )
+             ,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asi/'  )
+             ,(u'Middle east & Africa', u'http://www.defensenews.com/rss/mid/')
+             ,(u'Air', u'http://www.defensenews.com/rss/air/'                 )
+             ,(u'Land', u'http://www.defensenews.com/rss/lan/'                )
+             ,(u'Naval', u'http://www.defensenews.com/rss/sea/'               )
+            ]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
--- a/recipes/dilbert.recipe
+++ b/recipes/dilbert.recipe
@ -2,6 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.dilbert.com
+DrMerry added cover Image 2011-11-12
 '''

 from calibre.web.feeds.recipes import BasicNewsRecipe
@ -9,7 +10,7 @@ import re

 class DilbertBig(BasicNewsRecipe):
    title                  = 'Dilbert'
-    __author__             = 'Darko Miletic and Starson17'
+    __author__             = 'Darko Miletic and Starson17 contribution of DrMerry'
    description            = 'Dilbert'
    reverse_article_order = True
    oldest_article         = 15
@ -20,6 +21,7 @@ class DilbertBig(BasicNewsRecipe):
    publisher              = 'UNITED FEATURE SYNDICATE, INC.'
    category               = 'comic'
    language               = 'en'
+    cover_url         = 'http://dilbert.com/mobile/mobile/dilbert.app.icon.png'

    conversion_options = {
                             'comments'        : description
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
-    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
-    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -56,6 +54,14 @@ class Economist(BasicNewsRecipe):
        return br
    '''

+    def get_cover_url(self):
+        br = self.browser
+        br.open(self.INDEX)
+        issue = br.geturl().split('/')[4]
+        self.log('Fetching cover for issue: %s'%issue)
+        cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
+        return cover_url
+
    def parse_index(self):
        return self.economist_parse_index()

--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
-    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
-    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -40,6 +38,14 @@ class Economist(BasicNewsRecipe):
    # downloaded with connection reset by peer (104) errors.
    delay = 1

+    def get_cover_url(self):
+        br = self.browser
+        br.open(self.INDEX)
+        issue = br.geturl().split('/')[4]
+        self.log('Fetching cover for issue: %s'%issue)
+        cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
+        return cover_url
+

    def parse_index(self):
        try:
--- a/recipes/ekathemerini.recipe
+++ b/recipes/ekathemerini.recipe
@ -0,0 +1,58 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
+
+class Ekathimerini(BasicNewsRecipe):
+    title                  = 'ekathimerini'
+    __author__ = 'Thomas Scholl'
+    description            = 'News from Greece, English edition'
+    masthead_url           = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
+    max_articles_per_feed  = 100
+    oldest_article         = 100
+    publisher              = 'Kathimerini'
+    category               = 'news, GR'
+    language               = 'en_GR'
+    encoding               = 'windows-1253'
+    conversion_options     = { 'linearize_tables': True}
+    no_stylesheets         = True
+    delay                  = 1
+    keep_only_tags         = [dict(name='td', attrs={'class':'news'})]
+
+    rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
+
+    def find_articles(self, idx, category):
+        for article in idx.findAll('item'):
+            cat = u''
+            cat_elem = article.find('subcat')
+            if cat_elem:
+                cat = self.tag_to_string(cat_elem)
+
+            if cat == category:
+                desc_html = self.tag_to_string(article.find('description'))
+                description = self.tag_to_string(BeautifulSoup(desc_html))
+
+                a = {
+                        'title':  self.tag_to_string(article.find('title')),
+                        'url': self.tag_to_string(article.find('link')),
+                        'description': description,
+                        'date' : self.tag_to_string(article.find('pubdate')),
+                        }
+                yield a
+
+
+    def parse_index(self):
+        idx_contents = self.browser.open(self.rss_url).read()
+        idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
+
+        cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')]))
+        cats.sort()
+
+        feeds = [(u'News',list(self.find_articles(idx, u'')))]
+
+        for cat in cats:
+            feeds.append((cat.capitalize(), list(self.find_articles(idx, cat))))
+
+        return feeds
+
+    def print_version(self, url):
+       return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')
+
--- a/recipes/el_pais.recipe
+++ b/recipes/el_pais.recipe
@ -33,7 +33,7 @@ class ElPais(BasicNewsRecipe):
    remove_javascript = True
    no_stylesheets = True

-    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
+    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','cuerpo_noticia','caja_despiece']})]

    extra_css             = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '

--- a/recipes/eluniversal_ve.recipe
+++ b/recipes/eluniversal_ve.recipe
@ -56,6 +56,7 @@ class ElUniversal(BasicNewsRecipe):
            ]

    def print_version(self, url):
-        rp,sep,rest = url.rpartition('/')
-        return rp + sep + 'imp_' + rest
+        return url + '-imp'

+    def get_article_url(self, article):
+        return article.get('guid',  None)
--- a/recipes/frandroid.recipe
+++ b/recipes/frandroid.recipe
@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class BasicUserRecipe1318572550(BasicNewsRecipe):
+    title          = u'FrAndroid'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'FrAndroid', u'http://feeds.feedburner.com/Frandroid')]
--- a/recipes/ftd.recipe
+++ b/recipes/ftd.recipe
@ -16,7 +16,7 @@ class FTDe(BasicNewsRecipe):
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
    language = 'de'
-    max_articles_per_feed = 40
+    max_articles_per_feed = 30
    no_stylesheets = True

    remove_tags = [dict(id='navi_top'),
@ -84,19 +84,19 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'artikelsplitfaq'})]
    #remove_tags_after = [dict(name='a', attrs={'class':'more'})]

-    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
-	       ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
-	       ('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
-	       ('Politik', 'http://www.ftd.de/rss2/politik'),
-	       ('Karriere_Management', 'http://www.ftd.de/rss2/karriere-management'),
-	       ('IT_Medien', 'http://www.ftd.de/rss2/it-medien'),
-	       ('Wissen', 'http://www.ftd.de/rss2/wissen'),
-	       ('Sport', 'http://www.ftd.de/rss2/sport'),
-	       ('Auto', 'http://www.ftd.de/rss2/auto'),
-	       ('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
-
-	     ]
+    feeds =  [
+	         ('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
+	         ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
+	         ('Meinungen', 'http://www.ftd.de/rss2/meinungshungrige'),
+	         ('Politik', 'http://www.ftd.de/rss2/politik'),
+	         ('Management & Karriere', 'http://www.ftd.de/rss2/karriere-management'),
+	         ('IT & Medien', 'http://www.ftd.de/rss2/it-medien'),
+	         ('Wissen', 'http://www.ftd.de/rss2/wissen'),
+	         ('Sport', 'http://www.ftd.de/rss2/sport'),
+	         ('Auto', 'http://www.ftd.de/rss2/auto'),
+	         ('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
+	       ]


    def print_version(self, url):
-        return url.replace('.html', '.html?mode=print')
+        return url.replace('.html', '.html?mode=print')
--- a/recipes/geek_poke.recipe
+++ b/recipes/geek_poke.recipe
@ -1,35 +1,82 @@
-#!/usr/bin/python
-
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
+from calibre.utils.magick import Image, create_canvas

 class AdvancedUserRecipe1307556816(BasicNewsRecipe):
    title          = u'Geek and Poke'
    __author__     = u'DrMerry'
    description    = u'Geek and Poke Cartoons'
+    publisher      = u'Oliver Widder'
+    author         = u'Oliver Widder, DrMerry (calibre-code), calibre'
    oldest_article = 31
    max_articles_per_feed = 100
    language       = u'en'
-    simultaneous_downloads = 5
+    simultaneous_downloads = 1
    #delay          = 1
-    timefmt        = ' [%A, %d %B, %Y]'
+    timefmt        = ' [%a, %d %B, %Y]'
    summary_length = -1
    no_stylesheets = True
+    category = 'News.IT, Cartoon, Humor, Geek'
+    use_embedded_content = False
    cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
    remove_javascript = True
    remove_empty_feeds = True
    publication_type = 'blog'
+    masthead_url = None
+    conversion_options = {
+                            'comments'         : ''
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'author'          : author
+                         }

-    preprocess_regexps = [ (re.compile(r'(<p>&nbsp;</p>|<iframe.*</iframe>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
-                                          (re.compile(r'(&nbsp;|  )', re.DOTALL|re.IGNORECASE),lambda match: ' '),
-                                          (re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>')
-                                         ]
+    remove_tags_before = dict(name='p', attrs={'class':'content-nav'})
+    remove_tags_after = dict(name='div', attrs={'class':'entry-content'})
+    remove_tags = [dict(name='div', attrs={'class':'entry-footer'}),
+                        dict(name='div', attrs={'id':'alpha'}),
+                        dict(name='div', attrs={'id':'gamma'}),
+                        dict(name='iframe'),
+                        dict(name='p', attrs={'class':'content-nav'})]

-    extra_css = 'body, h3, p, h2, h1, div, span{margin:0px} h2.date-header {font-size: 0.7em; color:#eee;} h3.entry-header{font-size: 1.0em} div.entry-body{font-size: 0.9em}'
+    filter_regexps = [(r'feedburner\.com'),
+                        (r'pixel.quantserve\.com'),
+                        (r'googlesyndication\.com'),
+                        (r'yimg\.com'),
+                        (r'scorecardresearch\.com')]

+    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->|<h2[^>]*>[^<]*</h2>[^<]*)', re.DOTALL|re.IGNORECASE),lambda match: ''),
+                        (re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
+                        (re.compile(r'(<h3[^>]*>)<a[^>]>((?!</a)*)</a></h3>', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + match.group(2) + '</h3>'),
+                        (re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL|re.IGNORECASE),lambda match: '<div id="merryImage"><cite>' + match.group(2) + '</cite><br>' + match.group(1) + '</div>'),
+                        (re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>'),
+                        ]

-    remove_tags_before = dict(name='h2', attrs={'class':'date-header'})
-    remove_tags_after = dict(name='div', attrs={'class':'entry-body'})
+    extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}'

+    def postprocess_html(self, soup, first):
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            img = Image()
+            img.open(iurl)
+            #width, height = img.size
+            #print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
+            img.trim(0)
+            #width, height = img.size
+            #print '***TRIMMED img width is: ', width, 'height is: ', height
+            left=0
+            top=0
+            border_color='#ffffff'
+            width, height = img.size
+            #print '***retrieved img width is: ', width, 'height is: ', height
+            height_correction = 1.17
+            canvas = create_canvas(width, height*height_correction,border_color)
+            canvas.compose(img, left, top)
+            #img = canvas
+            #img.save(iurl)
+            canvas.save(iurl)
+            #width, height = canvas.size
+            #print '***NEW img width is: ', width, 'height is: ', height
+        return soup

-    feeds          = [(u'Geek and Poke', u'http://feeds.feedburner.com/GeekAndPoke?format=xml')]
+    feeds          = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']
--- a/recipes/googlemobileblog.recipe
+++ b/recipes/googlemobileblog.recipe
@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1318572445(BasicNewsRecipe):
+    title          = u'Google Mobile Blog'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -19,6 +19,7 @@ class GN(BasicNewsRecipe):
        language = 'pl'
        remove_javascript = True
        temp_files = []
+        simultaneous_downloads = 1

        articles_are_obfuscated = True

@ -94,16 +95,16 @@ class GN(BasicNewsRecipe):

        def find_articles(self, main_block):
                for a in main_block.findAll('div', attrs={'class':'prev_doc2'}):
-                        art = a.find('a')
-                        yield {
+						art = a.find('a')
+						yield {
                                'title' : self.tag_to_string(art),
                                'url'   : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
                                'date'  : '',
                                'description' : ''
                                }
                for a in main_block.findAll('div', attrs={'class':'sr-document'}):
-                        art = a.find('a')
-                        yield {
+						art = a.find('a')
+						yield {
                                'title' : self.tag_to_string(art),
                                'url'   : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
                                'date'  : '',
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -119,10 +119,8 @@ class Guardian(BasicNewsRecipe):
                        }

    def parse_index(self):
-        try:
-            feeds = []
-            for title, href in self.find_sections():
-                feeds.append((title, list(self.find_articles(href))))
-            return feeds
-        except:
-            raise NotImplementedError
+        feeds = []
+        for title, href in self.find_sections():
+            feeds.append((title, list(self.find_articles(href))))
+        return feeds
+
--- a/recipes/hankyoreh.recipe
+++ b/recipes/hankyoreh.recipe
@ -0,0 +1,47 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
+'''
+Profile to download The Hankyoreh
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Hankyoreh(BasicNewsRecipe):
+    title          = u'Hankyoreh'
+    language = 'ko'
+    description = u'The Hankyoreh News articles'
+    __author__  = 'Seongkyoun Yoo'
+    oldest_article = 5
+    recursions = 1
+    max_articles_per_feed = 5
+    no_stylesheets         = True
+    keep_only_tags    = [
+                        dict(name='tr', attrs={'height':['60px']}),
+                        dict(id=['fontSzArea'])
+                        ]
+    remove_tags = [
+       dict(target='_blank'),
+       dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
+       dict(name='iframe', attrs={'width':['590']}),
+                  ]
+    remove_tags_after  = [
+                          dict(target='_top')
+                         ]
+    feeds = [
+    ('All News','http://www.hani.co.kr/rss/'),
+    ('Politics','http://www.hani.co.kr/rss/politics/'),
+    ('Economy','http://www.hani.co.kr/rss/economy/'),
+    ('Society','http://www.hani.co.kr/rss/society/'),
+    ('International','http://www.hani.co.kr/rss/international/'),
+    ('Culture','http://www.hani.co.kr/rss/culture/'),
+    ('Sports','http://www.hani.co.kr/rss/sports/'),
+    ('Science','http://www.hani.co.kr/rss/science/'),
+    ('Opinion','http://www.hani.co.kr/rss/opinion/'),
+    ('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
+    ('English Edition','http://www.hani.co.kr/rss/english_edition/'),
+    ('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
+    ('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
+    ('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
+    ('Multihani','http://www.hani.co.kr/rss/multihani/'),
+    ('Lead','http://www.hani.co.kr/rss/lead/'),
+    ('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
+           ]
--- a/recipes/hankyoreh21.recipe
+++ b/recipes/hankyoreh21.recipe
@ -0,0 +1,25 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
+'''
+Profile to download The Hankyoreh
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Hankyoreh21(BasicNewsRecipe):
+    title          = u'Hankyoreh21'
+    language = 'ko'
+    description = u'The Hankyoreh21 Magazine articles'
+    __author__	= 'Seongkyoun Yoo'
+    oldest_article = 20
+    recursions = 1
+    max_articles_per_feed = 120
+    no_stylesheets         = True
+    remove_javascript     = True
+    keep_only_tags    = [
+						dict(name='font', attrs={'class':'t18bk'}),
+						dict(id=['fontSzArea'])
+                        ]
+
+    feeds = [
+	('Hani21','http://h21.hani.co.kr/rss/ '),
+           ]
--- a/recipes/heise_online.recipe
+++ b/recipes/heise_online.recipe
@ -1,7 +1,9 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
 class AdvancedUserRecipe(BasicNewsRecipe):

-    title = 'Heise-online'
+    title = 'heise online'
    description = 'News vom Heise-Verlag'
    __author__ = 'schuster'
    use_embedded_content   = False
@ -12,10 +14,11 @@ class AdvancedUserRecipe(BasicNewsRecipe):
    remove_empty_feeds = True
    timeout = 5
    no_stylesheets = True
+    encoding = 'utf-8'


    remove_tags_after = dict(name ='p', attrs={'class':'editor'})
-    remove_tags = [dict(id='navi_top_container'),
+    remove_tags = [{'class':'navi_top_container'},
                            dict(id='navi_bottom'),
                            dict(id='mitte_rechts'),
                            dict(id='navigation'),
@ -25,28 +28,28 @@ class AdvancedUserRecipe(BasicNewsRecipe):
                            dict(id='content_foren'),
                            dict(id='seiten_navi'),
                            dict(id='adbottom'),
-                            dict(id='sitemap')]
+                            dict(id='sitemap'),
+                            dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
+                ]

    feeds =  [
                   ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
-                   ('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
-                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
-                   ('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
-                   ('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
-                   ('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
-                   ('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
-                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
-                   ('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
-                   ('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
-                   ('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
+                      ('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
+                   ('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
+                   ('Security', 'http://www.heise.de/security/news/news-atom.xml'),
+                   ('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
+                   ('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
+                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
+                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
+                   ('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
+                   ('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
                   ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
                   ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
                   ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
-                   ('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
+                   ('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
-]
+             ]

    def print_version(self, url):
        return url + '?view=print'
-
--- a/recipes/helsingin_sanomat.recipe
+++ b/recipes/helsingin_sanomat.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1298137661(BasicNewsRecipe):
  title          = u'Helsingin Sanomat'
  __author__ = 'oneillpt'
-  language = 'fi'
+  language              = 'fi'
  oldest_article = 7
  max_articles_per_feed = 100
  no_stylesheets = True
@ -11,21 +11,12 @@ class AdvancedUserRecipe1298137661(BasicNewsRecipe):
  conversion_options = {
                         'linearize_tables' : True
                       }
-  remove_tags = [
-                  dict(name='a', attrs={'id':'articleCommentUrl'}),
-                  dict(name='p', attrs={'class':'newsSummary'}),
-                  dict(name='div', attrs={'class':'headerTools'})
-                ]
+  keep_only_tags = [dict(name='div', attrs={'id':'main-content'}),
+              dict(name='div', attrs={'class':'contentNewsArticle'})]

-  feeds          = [(u'Uutiset - HS.fi', u'http://www.hs.fi/uutiset/rss/'), (u'Politiikka - HS.fi', u'http://www.hs.fi/politiikka/rss/'),
+  feeds          = [(u'Uutiset - HS.fi', u'http://www.hs.fi/uutiset/rss/')
+, (u'Politiikka - HS.fi', u'http://www.hs.fi/politiikka/rss/'),
                     (u'Ulkomaat - HS.fi', u'http://www.hs.fi/ulkomaat/rss/'), (u'Kulttuuri - HS.fi', u'http://www.hs.fi/kulttuuri/rss/'),
                     (u'Kirjat - HS.fi', u'http://www.hs.fi/kulttuuri/kirjat/rss/'), (u'Elokuvat - HS.fi', u'http://www.hs.fi/kulttuuri/elokuvat/rss/')
                     ]

-  def print_version(self, url):
-    j = url.rfind("/")
-    s = url[j:]
-    i = s.rfind("?ref=rss")
-    if i > 0:
-      s = s[:i]
-    return "http://www.hs.fi/tulosta" + s
--- a/recipes/honvedelem.recipe
+++ b/recipes/honvedelem.recipe
@ -1,50 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-from __future__ import with_statement
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class HunMilNews(BasicNewsRecipe):
-    title          = u'Honvedelem.hu'
-    oldest_article = 3
-    description = u'Katonah\xedrek'
-    language = 'hu'
-
-    lang = 'hu'
-    encoding = 'windows-1250'
-    category = 'news, military'
-
-    no_stylesheets         = True
-
-
-    __author__ = 'Devilinside'
-    max_articles_per_feed = 16
-    no_stylesheets = True
-
-
-
-    keep_only_tags = [dict(name='div', attrs={'class':'cikkoldal_cikk_cim'}),
- dict(name='div', attrs={'class':'cikkoldal_cikk_alcim'}),
- dict(name='div', attrs={'class':'cikkoldal_datum'}),
- dict(name='div', attrs={'class':'cikkoldal_lead'}),
- dict(name='div', attrs={'class':'cikkoldal_szoveg'}),
- dict(name='img', attrs={'class':'ajanlo_kep_keretes'}),
-        ]
-
-
-
-    feeds          = [(u'Misszi\xf3k', u'http://www.honvedelem.hu/rss_b?c=22'),
- (u'Aktu\xe1lis hazai h\xedrek', u'http://www.honvedelem.hu/rss_b?c=3'),
- (u'K\xfclf\xf6ldi h\xedrek', u'http://www.honvedelem.hu/rss_b?c=4'),
- (u'A h\xf3nap t\xe9m\xe1ja', u'http://www.honvedelem.hu/rss_b?c=6'),
- (u'Riport', u'http://www.honvedelem.hu/rss_b?c=5'),
- (u'Portr\xe9k', u'http://www.honvedelem.hu/rss_b?c=7'),
- (u'Haditechnika', u'http://www.honvedelem.hu/rss_b?c=8'),
- (u'Programok, esem\xe9nyek', u'http://www.honvedelem.hu/rss_b?c=12')
-        ]
-
--- a/recipes/huntechnet.recipe
+++ b/recipes/huntechnet.recipe
@ -1,41 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-from __future__ import with_statement
-
-__license__   = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class HunTechNet(BasicNewsRecipe):
-     title          = u'TechNet'
-     oldest_article = 3
-     description = u'Az ut\xf3bbi 3 nap TechNet h\xedrei'
-     language = 'hu'
-
-     lang = 'hu'
-     encoding = 'utf-8'
-     __author__ = 'Devilinside'
-     max_articles_per_feed = 30
-     timefmt = ' [%Y, %b %d, %a]'
-
-
-
-     
-     remove_tags_before = dict(name='div', attrs={'id':'c-main'})
-     remove_tags = [dict(name='div', attrs={'class':'wrp clr'}), 
- {'class' : ['screenrdr','forum','print','startlap','text_small','text_normal','text_big','email']},
-                                   ]
-     keep_only_tags = [dict(name='div', attrs={'class':'cikk_head box'}),dict(name='div', attrs={'class':'cikk_txt box'})]
-
-
-
-     feeds          = [(u'C\xedmlap',
- u'http://www.technet.hu/rss/cimoldal/'), (u'TechTud',
- u'http://www.technet.hu/rss/techtud/'), (u'PDA M\xe1nia',
- u'http://www.technet.hu/rss/pdamania/'), (u'Telefon',
- u'http://www.technet.hu/rss/telefon/'), (u'Sz\xe1m\xedt\xf3g\xe9p',
- u'http://www.technet.hu/rss/notebook/'), (u'GPS',
- u'http://www.technet.hu/rss/gps/')]
-
--- a/recipes/icons/la_republica.png
+++ b/recipes/icons/la_republica.png
--- a/recipes/icons/metro_news_nl.png
+++ b/recipes/icons/metro_news_nl.png
--- a/recipes/japan_times.recipe
+++ b/recipes/japan_times.recipe
@ -44,7 +44,11 @@ class JapanTimes(BasicNewsRecipe):
        return rurl.partition('?')[0]

    def print_version(self, url):
-        return url.replace('/cgi-bin/','/print/')
+        if '/rss/' in url:
+            return url.replace('.jp/rss/','.jp/print/')
+        if '/text/' in url:
+            return url.replace('.jp/text/','.jp/print/')
+        return url

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
--- a/recipes/korben.recipe
+++ b/recipes/korben.recipe
@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BasicUserRecipe1318619728(BasicNewsRecipe):
+    title          = u'Korben'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'Korben', u'http://feeds2.feedburner.com/KorbensBlog-UpgradeYourMind')]
+
+    def get_masthead_url(self):
+        masthead = 'http://korben.info/wp-content/themes/korben-steaw/hab/logo.png'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead
--- a/recipes/korea_herald.recipe
+++ b/recipes/korea_herald.recipe
@ -1,36 +1,35 @@
-__license__   = 'GPL v3'
-__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
-'''
-Profile to download KoreaHerald
-'''
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class KoreaHerald(BasicNewsRecipe):
-    title          = u'KoreaHerald'
-    language = 'en'
-    description = u'Korea Herald News articles'
-    __author__	= 'Seongkyoun Yoo'
-    oldest_article = 10
-    recursions = 3
-    max_articles_per_feed = 10
-    no_stylesheets         = True
-    keep_only_tags    = [
-						dict(id=['contentLeft', '_article'])
-                        ]
-
-    remove_tags = [
-       dict(name='iframe'),
-       dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
-       dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
-       ]
-
-    feeds = [
-	('All News','http://www.koreaherald.com/rss/020000000000.xml'),
-    ('National','http://www.koreaherald.com/rss/020100000000.xml'),
-    ('Business','http://www.koreaherald.com/rss/020200000000.xml'),
-    ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
-    ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
-    ('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
-    ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
-    ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
-	]
+__license__   = 'GPL v3'
+__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
+'''
+Profile to download KoreaHerald
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class KoreaHerald(BasicNewsRecipe):
+    title          = u'KoreaHerald'
+    language = 'en'
+    description = u'Korea Herald News articles'
+    __author__	= 'Seongkyoun Yoo'
+    oldest_article = 15
+    recursions = 3
+    max_articles_per_feed = 15
+    no_stylesheets         = True
+    keep_only_tags    = [
+						dict(id=['contentLeft', '_article'])
+                        ]
+
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
+       dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
+       ]
+
+    feeds = [
+    ('National','http://www.koreaherald.com/rss/020100000000.xml'),
+    ('Business','http://www.koreaherald.com/rss/020200000000.xml'),
+    ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
+    ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
+    ('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
+    ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
+    ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
+	]
--- a/recipes/kstar.recipe
+++ b/recipes/kstar.recipe
@ -1,7 +1,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
-    title = 'Kansascity Star'
+    title = 'Kansas City Star'
    language = 'en'
    __author__ = 'TonytheBookworm'
    description = 'www.kansascity.com feed'
--- a/recipes/kyungyhang
+++ b/recipes/kyungyhang
@ -0,0 +1,37 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
+'''
+Profile to download The Kyungyhang
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Kyungyhang(BasicNewsRecipe):
+    title          = u'Kyungyhang'
+    language = 'ko'
+    description = u'The Kyungyhang Shinmun articles'
+    __author__	= 'Seongkyoun Yoo'
+    oldest_article = 20
+    recursions = 2
+    max_articles_per_feed = 20
+    no_stylesheets         = True
+    remove_javascript     = True
+	
+    keep_only_tags    = [
+                        dict(name='div', attrs ={'class':['article_title_wrap']}),
+                        dict(name='div', attrs ={'class':['article_txt']})
+                        ]
+
+    remove_tags_after = dict(id={'sub_bottom'})
+	
+    remove_tags = [
+       dict(name='iframe'),
+       dict(id={'TdHot'}),
+       dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
+       dict(name='dl', attrs={'class':['CL']}),
+       dict(name='ul', attrs={'class':['tab']}),
+       ]
+	
+    feeds = [
+	('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
+           ]
--- a/recipes/la_republica.recipe
+++ b/recipes/la_republica.recipe
@ -1,51 +1,77 @@
-#!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
-description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version'
+__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
+description   = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'

 '''
 http://www.repubblica.it/
 '''

+import re
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe

 class LaRepubblica(BasicNewsRecipe):
-    __author__        = 'Lorenzo Vigentini, Gabriele Marini'
-    description   = 'Italian daily newspaper'
-
-    cover_url      = 'http://www.repubblica.it/images/homepage/la_repubblica_logo.gif'
-    title          = u'La Repubblica'
-    publisher      = 'Gruppo editoriale L\'Espresso'
-    category       = 'News, politics, culture, economy, general interest'
-
-    language       = 'it'
-    timefmt        = '[%a, %d %b, %Y]'
-
-    oldest_article = 5
-    max_articles_per_feed = 100
-    use_embedded_content  = False
-    recursion             = 10
-
-    remove_javascript = True
-    no_stylesheets    = True
+    title                   = 'La Repubblica'
+    __author__              = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
+    description             = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
+    masthead_url            = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
+    publisher               = 'Gruppo editoriale L\'Espresso'
+    category                = 'News, politics, culture, economy, general interest'
+    language                = 'it'
+    timefmt                 = '[%a, %d %b, %Y]'
+    oldest_article          = 5
+    encoding                = 'utf8'
+    use_embedded_content    = False
+    no_stylesheets          = True
+    publication_type        = 'newspaper'
+    articles_are_obfuscated = True    
+    temp_files              = []    
+    extra_css               = """
+                               img{display: block}
+                              """
+                           
+    remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']

+    preprocess_regexps = [
+        (re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
+        (re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE), lambda match: '<head><title>'),
+        (re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE), lambda match: '</title></head>')
+    ]
+    
    def get_article_url(self, article):
-        link = article.get('id', article.get('guid', None))
-        if link is None:
-            return article
-        return link
+        link = BasicNewsRecipe.get_article_url(self, article)
+        if link and not '.repubblica.it/' in link:
+            link2 = article.get('id', article.get('guid', None))
+            if link2:
+                link = link2
+        return link.rpartition('?')[0]        

-    keep_only_tags     = [dict(name='div', attrs={'class':'articolo'}),
-                          dict(name='div', attrs={'class':'body-text'}),
-#                          dict(name='div', attrs={'class':'page-content'}),
+    def get_obfuscated_article(self, url):
+        count = 0
+        while (count < 10):
+            try:
+                response = self.browser.open(url)
+                html = response.read()
+                count = 10
+            except:
+                print "Retrying download..."
+            count += 1        
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
+        
+    keep_only_tags     = [
+                          dict(attrs={'class':'articolo'}),
+                          dict(attrs={'class':'body-text'}),
                          dict(name='p', attrs={'class':'disclaimer clearfix'}),
-                          dict(name='div', attrs={'id':'contA'})
+                          dict(attrs={'id':'contA'})
                         ]


    remove_tags        = [
-                            dict(name=['object','link']),
+                            dict(name=['object','link','meta','iframe','embed']),
                            dict(name='span',attrs={'class':'linkindice'}),
                            dict(name='div', attrs={'class':'bottom-mobile'}),
                            dict(name='div', attrs={'id':['rssdiv','blocco']}),
@ -76,3 +102,11 @@ class LaRepubblica(BasicNewsRecipe):
                       (u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
                      ]

+    def preprocess_html(self, soup):
+        for item in soup.findAll(['hgroup','deresponsabilizzazione','per']):
+            item.name = 'div'
+            item.attrs = []            
+        for item in soup.findAll(style=True):
+            del item['style']           
+        return soup
+                      
--- a/recipes/lepoint.recipe
+++ b/recipes/lepoint.recipe
@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
+'''
+LePoint.fr
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class lepoint(BasicNewsRecipe):
+
+    title                  = 'Le Point'
+    __author__             = 'calibre'
+    description            = 'Actualités'
+    encoding               = 'utf-8'
+    publisher              = 'LePoint.fr'
+    category               = 'news, France, world'
+    language               = 'fr'
+
+    use_embedded_content   = False
+    timefmt                = ' [%d %b %Y]'
+    max_articles_per_feed  = 15
+    no_stylesheets         = True
+    remove_empty_feeds     = True
+    filterDuplicates       = True
+
+    extra_css = '''
+                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    .chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
+                    .info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                '''
+
+    remove_tags = [
+            dict(name='iframe'),
+            dict(name='div', attrs={'class':['entete_chroniqueur']}),
+            dict(name='div', attrs={'class':['col_article']}),
+            dict(name='div', attrs={'class':['signature_article']}),
+            dict(name='div', attrs={'class':['util_font util_article']}),
+            dict(name='div', attrs={'class':['util_article bottom']})
+    ]
+
+    keep_only_tags    = [dict(name='div', attrs={'class':['page_article']})]
+
+    remove_tags_after  = dict(name='div', attrs={'class':['util_article bottom']})
+
+    feeds = [
+        (u'À la une', 'http://www.lepoint.fr/rss.xml'),
+        ('International', 'http://www.lepoint.fr/monde/rss.xml'),
+        ('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'),
+        ('Sciences', 'http://www.lepoint.fr/science/rss.xml'),
+        ('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
+        (u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'),
+        ('Politique', 'http://www.lepoint.fr/politique/rss.xml'),
+        (u'Médias', 'http://www.lepoint.fr/medias/rss.xml'),
+        ('Culture', 'http://www.lepoint.fr/culture/rss.xml'),
+        (u'Santé', 'http://www.lepoint.fr/sante/rss.xml'),
+        ('Sport', 'http://www.lepoint.fr/sport/rss.xml')
+    ]
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    def get_masthead_url(self):
+        masthead = 'http://www.lepoint.fr/images/commun/logo.png'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead
--- a/recipes/lexpress.recipe
+++ b/recipes/lexpress.recipe
@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
+'''
+Lexpress.fr
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class lepoint(BasicNewsRecipe):
+
+    title                  = 'L\'express'
+    __author__             = 'calibre'
+    description            = 'Actualités'
+    encoding               = 'cp1252'
+    publisher              = 'LExpress.fr'
+    category               = 'Actualité, France, Monde'
+    language               = 'fr'
+
+    use_embedded_content   = False
+    timefmt                = ' [%d %b %Y]'
+    max_articles_per_feed  = 15
+    no_stylesheets         = True
+    remove_empty_feeds     = True
+    filterDuplicates       = True
+
+    extra_css = '''
+                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    .current_parent, p.heure, .ouverture {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    #contenu-article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                    .entete { font-weiht:bold;}
+                '''
+
+    remove_tags = [
+            dict(name='iframe'),
+            dict(name='div', attrs={'class':['barre-outil-fb']}),
+            dict(name='div', attrs={'class':['barre-outils']}),
+            dict(id='bloc-sommaire'),
+            dict(id='footer-article')
+    ]
+
+    keep_only_tags    = [dict(name='div', attrs={'class':['bloc-article']})]
+
+    remove_tags_after  = dict(id='content-article')
+
+    feeds = [
+        (u'À la une', 'http://www.lexpress.fr/rss/alaune.xml'),
+        ('International', 'http://www.lexpress.fr/rss/monde.xml'),
+        ('Tech/Web', 'http://www.lexpress.fr/rss/high-tech.xml'),
+        (u'Sciences/Santé', 'http://www.lexpress.fr/rss/science-et-sante.xml'),
+        (u'Envronnement', 'http://www.lexpress.fr/rss/environnement.xml'),
+        ('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
+        (u'Socièté', 'http://www.lexpress.fr/rss/societe.xml'),
+        ('Politique', 'http://www.lexpress.fr/rss/politique.xml'),
+        (u'Médias', 'http://www.lexpress.fr/rss/medias.xml'),
+        ('Culture', 'http://www.lexpress.fr/rss/culture.xml'),
+        ('Sport', 'http://www.lexpress.fr/rss/sport.xml')
+    ]
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    def get_masthead_url(self):
+        masthead = 'http://static.lexpress.fr/imgstat/logo_lexpress.gif'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead
--- a/recipes/liberation.recipe
+++ b/recipes/liberation.recipe
@ -9,39 +9,72 @@ liberation.fr
 from calibre.web.feeds.news import BasicNewsRecipe

 class Liberation(BasicNewsRecipe):
+
    title                 = u'Liberation'
-    __author__            = 'Darko Miletic'
-    description           = 'News from France'
-    language = 'fr'
+    __author__            = 'calibre'
+    description           = 'Actualités'
+    category               = 'Actualités, France, Monde'
+    language              = 'fr'

-    oldest_article        = 7
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
+    use_embedded_content   = False
+    timefmt                = ' [%d %b %Y]'
+    max_articles_per_feed  = 15
+    no_stylesheets         = True
+    remove_empty_feeds     = True
+    filterDuplicates       = True

-    html2lrf_options = ['--base-font-size', '10']
+    extra_css = '''
+                    h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
+                    h4, h5, h2.rubrique,  {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .mna-body, entry-body  {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                '''

    keep_only_tags    = [
-                           dict(name='h1')
-                          #,dict(name='div', attrs={'class':'object-content text text-item'})
-                          ,dict(name='div', attrs={'class':'article'})
-                          #,dict(name='div', attrs={'class':'articleContent'})
-                          ,dict(name='div', attrs={'class':'entry'})
-                        ]
-    remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ]
+                  dict(name='div', attrs={'class':'article'})
+                  ,dict(name='div', attrs={'class':'text-article m-bot-s1'})
+                  ,dict(name='div', attrs={'class':'entry'})
+                  ,dict(name='div', attrs={'class':'col_contenu'})
+    ]
+
+    remove_tags_after = [
+        dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
+        ,dict(name='p',attrs={'class':['chapo']})
+        ,dict(id='_twitter_facebook')
+    ]
+
    remove_tags    = [
-                        dict(name='p', attrs={'class':'clear'})
-                       ,dict(name='ul', attrs={'class':'floatLeft clear'})
-                       ,dict(name='div', attrs={'class':'clear floatRight'})
-                       ,dict(name='object')
-                       ,dict(name='div', attrs={'class':'toolbox'})
-                       ,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'})
-                       #,dict(name='div', attrs={'class':'clear block block-call-items'})
-                       ,dict(name='div', attrs={'class':'block-content'})
+                        dict(name='iframe')
+                        ,dict(name='a', attrs={'class':'lnk-comments'})
+                        ,dict(name='div', attrs={'class':'toolbox'})
+                        ,dict(name='ul', attrs={'class':'share-box'})
+                        ,dict(name='ul', attrs={'class':'tool-box'})
+                        ,dict(name='ul', attrs={'class':'rub'})
+                        ,dict(name='p',attrs={'class':['chapo']})
+                        ,dict(name='p',attrs={'class':['tag']})
+                        ,dict(name='div',attrs={'class':['blokLies']})
+                        ,dict(name='div',attrs={'class':['alire']})
+                        ,dict(id='_twitter_facebook')
                     ]

    feeds          = [
-                         (u'La une', u'http://www.liberation.fr/rss/laune')
-                        ,(u'Monde' , u'http://www.liberation.fr/rss/monde')
-                        ,(u'Sports', u'http://www.liberation.fr/rss/sports')
+                         (u'La une', u'http://rss.liberation.fr/rss/9/')
+                        ,(u'Monde' , u'http://www.liberation.fr/rss/10/')
+                        ,(u'Économie', u'http://www.liberation.fr/rss/13/')
+                        ,(u'Politiques', u'http://www.liberation.fr/rss/11/')
+                        ,(u'Société', u'http://www.liberation.fr/rss/12/')
+                        ,(u'Cinéma', u'http://www.liberation.fr/rss/58/')
+                        ,(u'Écran', u'http://www.liberation.fr/rss/53/')
+                        ,(u'Sports', u'http://www.liberation.fr/rss/12/')
                     ]
+
+    def get_masthead_url(self):
+        masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead
--- a/recipes/los_tiempos_bo.recipe
+++ b/recipes/los_tiempos_bo.recipe
@ -22,7 +22,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
    publication_type      = 'newspaper'
    delay                 = 1
    remove_empty_feeds    = True
-    cover_url             = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_md_1.jpg')
+    cover_url             = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_gd_1.jpg')
    masthead_url          = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg'
    extra_css             = """ body{font-family: Arial,Helvetica,sans-serif }
                                img{margin-bottom: 0.4em}
--- a/recipes/merco_press.recipe
+++ b/recipes/merco_press.recipe
@ -0,0 +1,27 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MercoPress(BasicNewsRecipe):
+    title = u'Merco Press'
+    description = u"Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America."
+    cover_url = 'http://en.mercopress.com/web/img/en/mercopress-logo.gif'
+
+    __author__ = 'Russell Phillips'
+    language = 'en'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
+    remove_tags = [dict(name='a')]
+
+    feeds = [('Antarctica', 'http://en.mercopress.com/rss/antarctica'),
+        ('Argentina', 'http://en.mercopress.com/rss/argentina'),
+        ('Brazil', 'http://en.mercopress.com/rss/brazil'),
+        ('Falkland Islands', 'http://en.mercopress.com/rss/falkland-islands'),
+        ('International News', 'http://en.mercopress.com/rss/international'),
+        ('Latin America', 'http://en.mercopress.com/rss/latin-america'),
+        ('Mercosur', 'http://en.mercopress.com/rss/mercosur'),
+        ('Paraguay', 'http://en.mercopress.com/rss/paraguay'),
+        ('United States', 'http://en.mercopress.com/rss/united-states'),
+        ('Uruguay://en.mercopress.com/rss/uruguay')]
--- a/recipes/metro_news_nl.recipe
+++ b/recipes/metro_news_nl.recipe
@ -1,9 +1,21 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import re
+from calibre.utils.magick import Image
+
+
+''' Version 1.2, updated cover image to match the changed website.
+ added info date on title
+ version 1.4 Updated tags, delay and added autoclean 22-09-2011
+ version 1.5 Changes due to changes in site
+ version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes
+    Added som processing on pictures
+    Removed links in html
+    Removed extre white characters
+    changed handling of self closing span
+ '''

 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title = u'Metro Nieuws NL'
-# Version 1.2, updated cover image to match the changed website.
-# added info date on title
    oldest_article = 2
    max_articles_per_feed = 100
    __author__     = u'DrMerry'
@ -11,8 +23,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    language       = u'nl'
    simultaneous_downloads = 5
    #delay          = 1
-    auto_cleanup = True
-    auto_cleanup_keep = '//div[@class="article-image-caption-2column"]|//div[@id="date"]'
+    #auto_cleanup = True
+    #auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
    timefmt        = ' [%A, %d %b %Y]'
    no_stylesheets = True
    remove_javascript = True
@ -20,22 +32,73 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    cover_url      = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
    publication_type = 'newspaper'
    remove_tags_before = dict(name='div', attrs={'id':'date'})
-    remove_tags_after = dict(name='div', attrs={'id':'column-1-3'})
+    remove_tags_after = dict(name='div', attrs={'class':'article-body'})
    encoding              = 'utf-8'
-    extra_css = 'body{font-size:12px} #date, .article-image-caption {font-size: 0.583em} h2 {font-size: 0.917em} p.small, span, li, li span span, p, b, i, u, p.small.article-paragraph,  p.small.article-paragraph p, p.small.article-paragraph span, p span, span {font-size: 0.833em} h1 {font-size: 1em}'
+    remove_attributes = ['style', 'font', 'width', 'height']
+    use_embedded_content = False
+    extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
+        #date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
+        .article-box-fact.module-title {clear:both;border-top:1px solid black;border-bottom:4px solid black;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
+        h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;line-height: 1.15;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
+        .article-body p{padding-bottom:10px;}div.column-1-3{float: left;display: inline;width: 567px;margin-left: 19px;border-right: 1px solid #CACACA;padding-right: 9px;}\
+        div.column-1-2 {float: left;display: inline;width: 373px;padding-right: 7px;border-right: 1px solid #CACACA;}\
+        p.article-image-caption {font-size: 12px;font-weight: 300;line-height: 1.4;color: #616262;margin-top: 5px;} \
+        p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
+        div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
+        div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
+        img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
+
+    keep_only_tags = [dict(name='div', attrs={'class':[ 'article-image-caption-2column', 'article-image-caption-3column', 'article-body', 'article-box-fact']}),
+        dict(name='div', attrs={'id':['date']}),
+        dict(name='h1', attrs={'class':['title']}),
+        dict(name='h2', attrs={'class':['subtitle']})]

    remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
        'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
        'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
-        dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar']}),
+        dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
        dict(name='iframe')]

+    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->)', re.DOTALL|re.IGNORECASE),lambda match: ''),
+        (re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
+        (re.compile(r'([\s>])([^\s>]+)(<span[^>]+) />', re.DOTALL|re.IGNORECASE),
+            lambda match: match.group(1) + match.group(3) + '>' + match.group(2) + '</span>'),
+        ]
+
+    def postprocess_html(self, soup, first):
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            img = Image()
+            img.open(iurl)
+        #width, height = img.size
+        #print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
+            img.trim(0)
+            img.save(iurl)
+            '''
+            #width, height = img.size
+            #print '***TRIMMED img width is: ', width, 'height is: ', height
+            left=0
+            top=0
+            border_color='#ffffff'
+            width, height = img.size
+            #print '***retrieved img width is: ', width, 'height is: ', height
+            height_correction = 1.17
+            canvas = create_canvas(width, height*height_correction,border_color)
+            canvas.compose(img, left, top)
+            #img = canvas
+            canvas.save(iurl)
+            #width, height = canvas.size
+            #print '***NEW img width is: ', width, 'height is: ', height
+            '''
+        return soup
+
    feeds = [
        (u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'),
        (u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'),
        (u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'),
        (u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'),
        (u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'),
+        (u'Buitenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-4'),
        (u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
        (u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
        (u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -5,30 +5,46 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    description = 'News as provide by The Metro -UK'

    __author__ = 'Dave Asbury'
+    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
+
    no_stylesheets = True
    oldest_article = 1
-    max_articles_per_feed = 25
+    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True

-    preprocess_regexps = [(re.compile(r'Tweet'), lambda  a : '')]
+    #preprocess_regexps = [(re.compile(r'Tweet'), lambda  a : '')]
+    preprocess_regexps = [
+    (re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
+    preprocess_regexps = [
+    (re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]

    language = 'en_GB'


    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'

-    extra_css = 'h2 {font: sans-serif medium;}'
+
    keep_only_tags = [
 	dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
                    dict(attrs={'class':['img-cnt figure']}),
    	dict(attrs={'class':['art-img']}),
-
-                    dict(name='div', attrs={'class':'art-lft'})
+                    dict(name='div', attrs={'class':'art-lft'}),
+                    dict(name='p')
    ]
    remove_tags    = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
                             'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
-	dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
-	          ]
+	          dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
+                              ,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
+                               ]
    feeds          = [
        (u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
+
+    extra_css  = '''
+                    body {font: sans-serif medium;}'
+	h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
+               	h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
+                	span{ font-size:9.5px; font-weight:bold;font-style:italic}
+                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+
+	 '''
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -4,21 +4,31 @@ __copyright__ = '2010-2011, Eddie Lau'
 # Region - Hong Kong, Vancouver, Toronto
 __Region__ = 'Hong Kong'
 # Users of Kindle 3 with limited system-level CJK support
-# please replace the following "True" with "False".
+# please replace the following "True" with "False". (Default: True)
 __MakePeriodical__ = True
-# Turn below to true if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles (Default: False)
 __UseChineseTitle__ = False
-# Set it to False if you want to skip images
+# Set it to False if you want to skip images (Default: True)
 __KeepImages__ = True
-# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
 __UseLife__ = True
-# (HK only) if __UseLife__ is true, turn this on if you want to include the column section
-__InclCols__ = False
+# (HK only) It is to disable premium content (Default: False)
+__InclPremium__ = False
+# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
+__ParsePFF__ = True
+# (HK only) Turn below to True if you wish hi-res images (Default: False)
+__HiResImg__ = False
+# Override the date returned by the program if specifying a YYYYMMDD below
+__Date__ = ''


 '''
 Change Log:
-2011/09/21: fetching "column" section is made optional. Default is False
+2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
+2011/10/19: fix a bug in txt source parsing
+2011/10/17: disable fetching of premium content, also improved txt source parsing
+2011/10/04: option to get hi-res photos for the articles
+2011/09/21: fetching "column" section is made optional. 
 2011/09/18: parse "column" section stuff from source text file directly.
 2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -42,7 +52,7 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''

-import os, datetime, re
+import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -56,7 +66,7 @@ class MPRecipe(BasicNewsRecipe):
        title       = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
-        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
@ -67,7 +77,7 @@ class MPRecipe(BasicNewsRecipe):
                          dict(attrs={'class':['content']}),  # for content from txt
                          dict(attrs={'class':['photo']}),
                          dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}),  # content in printed version of life.mingpao.com
-                          dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
+                          dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
                          dict(attrs={'class':['images']})   # for images from txt
                          ]
        if __KeepImages__:
@ -147,43 +157,6 @@ class MPRecipe(BasicNewsRecipe):
    conversion_options = {'linearize_tables':True}
    timefmt = ''

-    def image_url_processor(cls, baseurl, url):
-        # trick: break the url at the first occurance of digit, add an additional
-        # '_' at the front
-        # not working, may need to move this to preprocess_html() method
-#        minIdx = 10000
-#        i0 = url.find('0')
-#        if i0 >= 0 and i0 < minIdx:
-#           minIdx = i0
-#        i1 = url.find('1')
-#        if i1 >= 0 and i1 < minIdx:
-#           minIdx = i1
-#        i2 = url.find('2')
-#        if i2 >= 0 and i2 < minIdx:
-#           minIdx = i2
-#        i3 = url.find('3')
-#        if i3 >= 0 and i0 < minIdx:
-#           minIdx = i3
-#        i4 = url.find('4')
-#        if i4 >= 0 and i4 < minIdx:
-#           minIdx = i4
-#        i5 = url.find('5')
-#        if i5 >= 0 and i5 < minIdx:
-#           minIdx = i5
-#        i6 = url.find('6')
-#        if i6 >= 0 and i6 < minIdx:
-#           minIdx = i6
-#        i7 = url.find('7')
-#        if i7 >= 0 and i7 < minIdx:
-#           minIdx = i7
-#        i8 = url.find('8')
-#        if i8 >= 0 and i8 < minIdx:
-#           minIdx = i8
-#        i9 = url.find('9')
-#        if i9 >= 0 and i9 < minIdx:
-#           minIdx = i9
-        return url
-
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
@ -201,13 +174,22 @@ class MPRecipe(BasicNewsRecipe):
        return dt_local

    def get_fetchdate(self):
-        return self.get_dtlocal().strftime("%Y%m%d")
+        if __Date__ <> '':
+            return __Date__
+        else:
+            return self.get_dtlocal().strftime("%Y%m%d")

    def get_fetchformatteddate(self):
-        return self.get_dtlocal().strftime("%Y-%m-%d")
+        if __Date__ <> '':
+            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%Y-%m-%d")

    def get_fetchday(self):
-        return self.get_dtlocal().strftime("%d")
+        if __Date__ <> '':
+            return __Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%d")

    def get_cover_url(self):
        if __Region__ == 'Hong Kong':
@ -240,18 +222,21 @@ class MPRecipe(BasicNewsRecipe):
                                           (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
                                           (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                          ]:
-                    articles = self.parse_section2(url, keystr)
+                    if __InclPremium__ == True:
+                        articles = self.parse_section2_txt(url, keystr)
+                    else:
+                        articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))

-                if __InclCols__ == True:
+                if __InclPremium__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
-
+                        
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
@ -260,15 +245,16 @@ class MPRecipe(BasicNewsRecipe):
            else:
                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
+                                   (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))

                # special- editorial
-                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
-                if ed_articles:
-                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+                #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                #if ed_articles:
+                #    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))

                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -279,32 +265,46 @@ class MPRecipe(BasicNewsRecipe):

                # special - finance
                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
-                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
-                if fin_articles:
-                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+                #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                #if fin_articles:
+                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))

-                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
-                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
-                    articles = self.parse_section(url)
+                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
+                    articles = self.parse_section2_txt(url, keystr)
                    if articles:
                        feeds.append((title, articles))
+                        
+                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                #    articles = self.parse_section(url)
+                #    if articles:
+                #        feeds.append((title, articles))

                # special - entertainment
-                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
-                if ent_articles:
-                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+                #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                #if ent_articles:
+                #    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))

+                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
+                                          ]:
+                    articles = self.parse_section2_txt(url, keystr)
+                    if articles:
+                        feeds.append((title, articles))
+                        
+                if __InclPremium__ == True:
+                    # parse column section articles directly from .txt files
+                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
+                                              ]:
+                        articles = self.parse_section2_txt(url, keystr)
+                        if articles:
+                            feeds.append((title, articles))
+                            
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))

-
-                # special- columns
-                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
-                if col_articles:
-                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
        elif __Region__ == 'Vancouver':
            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -348,6 +348,16 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            # replace the url to the print-friendly version
+            if __ParsePFF__ == True:
+                if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
+                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
+                    url = re.sub('%2F.*%2F', '/', url)
+                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
+                    url = url.replace('%2Etxt', '_print.htm')
+                    url = url.replace('%5F', '_')
+                else:
+                    url = url.replace('.htm', '_print.htm')
            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                included_urls.append(url)
@ -356,6 +366,8 @@ class MPRecipe(BasicNewsRecipe):

    # parse from life.mingpao.com
    def parse_section2(self, url, keystr):
+        br = mechanize.Browser()
+        br.set_handle_redirect(False)
        self.get_fetchdate()
        soup = self.index_to_soup(url)
        a = soup.findAll('a', href=True)
@ -366,9 +378,13 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(i)
            url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
            if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
-                url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
-                current_articles.append({'title': title, 'url': url, 'description': ''})
-                included_urls.append(url)
+                try: 
+                    br.open_novisit(url)
+                    url = url.replace('dailynews3.cfm', 'dailynews3a.cfm')  # use printed version of the article
+                    current_articles.append({'title': title, 'url': url, 'description': ''})
+                    included_urls.append(url)
+                except:
+				    print 'skipping a premium article'
        current_articles.reverse()
        return current_articles

@ -389,7 +405,7 @@ class MPRecipe(BasicNewsRecipe):
                included_urls.append(url)
        current_articles.reverse()
        return current_articles
-
+        
    # parse from www.mingpaovan.com
    def parse_section3(self, url, baseUrl):
        self.get_fetchdate()
@ -472,39 +488,152 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles

-    # preprocess those .txt based files
+    # preprocess those .txt and javascript based files
    def preprocess_raw_html(self, raw_html, url):
-        if url.rfind('ftp') == -1:
-            return raw_html
-        else:
-            splitter = re.compile(r'\n') # Match non-digits
-            new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
-            next_is_img_txt = False
-            title_started = False
-            met_article_start_char = False
-            for item in splitter.split(raw_html):
-                if item.startswith(u'\u3010'):
-                    met_article_start_char = True
-                    new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
-                else:
-                    if next_is_img_txt == False:
-                        if item.startswith('='):
-                            next_is_img_txt = True
-                            new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
+        new_html = raw_html
+        if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
+            if url.rfind('_print.htm') <> -1:
+                # javascript based file
+                splitter = re.compile(r'\n')
+                new_raw_html = '<html><head><title>Untitled</title></head>'
+                new_raw_html = new_raw_html + '<body>'
+                for item in splitter.split(raw_html):
+                    if item.startswith('var heading1 ='):
+                        heading = item.replace('var heading1 = \'', '')
+                        heading = heading.replace('\'', '')
+                        heading = heading.replace(';', '')
+                        new_raw_html = new_raw_html + '<div class="heading">' + heading
+                    if item.startswith('var heading2 ='):
+                        heading = item.replace('var heading2 = \'', '')
+                        heading = heading.replace('\'', '')
+                        heading = heading.replace(';', '')
+                        if heading <> '':
+                            new_raw_html = new_raw_html + '<br>' + heading + '</div>'
                        else:
-                            if met_article_start_char == False:
-                                if title_started == False:
-                                    new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
-                                    title_started = True
-                                else:
-                                    new_raw_html = new_raw_html + item + '\n'
-                            else:
-                                new_raw_html = new_raw_html + item + '<p>\n'
+                            new_raw_html = new_raw_html + '</div>'
+                    if item.startswith('var content ='):
+                        content = item.replace("var content = ", '')
+                        content = content.replace('\'', '')
+                        content = content.replace(';', '')
+                        new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
+                    if item.startswith('var photocontent ='):
+                        photo = item.replace('var photocontent = \'', '')
+                        photo = photo.replace('\'', '')
+                        photo = photo.replace(';', '')
+                        photo = photo.replace('<tr>', '')
+                        photo = photo.replace('<td>', '')
+                        photo = photo.replace('</tr>', '')
+                        photo = photo.replace('</td>', '<br>')
+                        photo = photo.replace('class="photo"', '')
+                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
+                new_html = new_raw_html + '</body></html>'
+            else: 
+                # .txt based file
+                splitter = re.compile(r'\n') # Match non-digits
+                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
+                next_is_img_txt = False
+                title_started = False
+                met_article_start_char = False
+                for item in splitter.split(raw_html):
+                    item = item.strip()
+                    if item.startswith(u'\u3010'):
+                        met_article_start_char = True
+                        new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    else:
-                        next_is_img_txt = False
-                        new_raw_html = new_raw_html + item + '\n'
-            return new_raw_html + '</div></body></html>'
-
+                        if next_is_img_txt == False:
+                            if item.startswith("=@"):
+                                print 'skip movie link'
+                            elif item.startswith("=?"):
+                                next_is_img_txt = True
+                                new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
+                            elif item.startswith('=='):
+                                next_is_img_txt = True
+                                if False:
+                                    # TODO: check existence of .gif first
+                                    newimg = '_' + item[2:].strip() + '.jpg'
+                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
+                                else:
+                                    new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
+                            elif item.startswith('='):
+                                next_is_img_txt = True
+                                if False:
+                                    # TODO: check existence of .gif first
+                                    newimg = '_' + item[1:].strip() + '.jpg'
+                                    new_raw_html += '<img src="' + newimg + '" /><p>\n'
+                                else:
+                                    new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
+                            else:
+                                if next_is_img_txt == False and met_article_start_char == False:
+                                    if item <> '':
+                                        if title_started == False:
+                                            #print 'Title started at ', item
+                                            new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
+                                            title_started = True
+                                        else:
+                                            new_raw_html = new_raw_html + item + '\n'
+                                else:
+                                    new_raw_html = new_raw_html + item + '<p>\n'
+                        else:
+                            next_is_img_txt = False
+                            new_raw_html = new_raw_html + item + '\n'
+                new_html = new_raw_html + '</div></body></html>'
+        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
+        if __HiResImg__ == True:
+            # TODO: add a _ in front of an image url
+            if url.rfind('news.mingpao.com') > -1: 
+                imglist =  re.findall('src="?.*?jpg"', new_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                for img in imglist:
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try: 
+                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except: 
+                        # find the location of the first _
+                        pos = img.find('_')
+                        if pos > -1:
+                            # if found, insert _ after the first _
+                            newimg = img[0:pos] + '_' + img[pos:]
+                            new_html = new_html.replace(img, newimg)
+                        else: 
+                            # if not found, insert _ after "
+                            new_html = new_html.replace(img[1:], '"_' + img[1:])
+            elif url.rfind('life.mingpao.com') > -1:
+                imglist = re.findall('src=\'?.*?jpg\'', new_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                #print 'Img list: ', imglist, '\n'
+                for img in imglist:
+                    #print 'Found img: ', img
+                    gifimg = img.replace('jpg\'', 'gif\'')
+                    try:
+                        gifurl = re.sub(r'dailynews.*txt', '', url)
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        pos = img.rfind('/')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        new_html = new_html.replace(img, newimg)
+                # repeat with src quoted by double quotes, for text parsed from src txt
+                imglist = re.findall('src="?.*?jpg"', new_html)
+                for img in imglist:
+                    #print 'Found img: ', img
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try:
+                        #print 'url', url
+                        pos = url.rfind('/')
+                        gifurl = url[:pos+1]
+                        #print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        new_html = new_html.replace(img, gifimg)
+                    except:
+                        pos = img.find('"')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        #print 'Use hi-res img', newimg
+                        new_html = new_html.replace(img, newimg)
+        return new_html
+        
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
@ -513,7 +642,7 @@ class MPRecipe(BasicNewsRecipe):
        for item in soup.findAll(stype=True):
            del item['absmiddle']
        return soup
-
+        
    def create_opf(self, feeds, dir=None):
        if dir is None:
            dir = self.output_dir
--- a/recipes/new_london_day.recipe
+++ b/recipes/new_london_day.recipe
@ -8,7 +8,7 @@ class AdvancedUserRecipe1294342201(BasicNewsRecipe):
    title          = u'New London Day'
    __author__  = 'Being'
    description = 'State, local and business news from New London, CT'
-    language = 'en_GB'
+    language = 'en'
    oldest_article = 1
    max_articles_per_feed = 200

--- a/recipes/novaya_gazeta.recipe
+++ b/recipes/novaya_gazeta.recipe
@ -10,9 +10,8 @@ class AdvancedUserRecipe1286819935(BasicNewsRecipe):
    remove_attributes = ['style']
    language = 'ru'

-    feeds = [(u'Articles', u'http://www.novayagazeta.ru/rss_number.xml')]
+    feeds = [(u'Articles', u'http://www.novayagazeta.ru/rss/all.xml')]


    def print_version(self, url):
-        return url + '?print=true'
-
+        return '%s%s' % (url, '?print=1')
--- a/recipes/omgubuntu.recipe
+++ b/recipes/omgubuntu.recipe
@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class BasicUserRecipe1318619832(BasicNewsRecipe):
+    title          = u'OmgUbuntu'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    feeds          = [(u'Omg Ubuntu', u'http://feeds.feedburner.com/d0od')]
+
+    def get_masthead_url(self):
+        masthead = 'http://cdn.omgubuntu.co.uk/wp-content/themes/omgubuntu/images/logo.png'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead
--- a/recipes/penguin_news.recipe
+++ b/recipes/penguin_news.recipe
@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MercoPress(BasicNewsRecipe):
+    title          = u'Penguin News'
+    description = u"Penguin News: the Falkland Islands' only newspaper."
+    cover_url = 'http://www.penguin-news.com/templates/rt_syndicate_j15/images/logo/light/logo1.png'
+    language = 'en'
+
+    __author__ = 'Russell Phillips'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+
+    extra_css  = 'img{padding-bottom:1ex; display:block; text-align: center;}'
+
+    feeds = [(u'Penguin News - Falkland Islands', u'http://www.penguin-news.com/index.php?format=feed&amp;type=rss')]
--- a/recipes/phoronix.recipe
+++ b/recipes/phoronix.recipe
@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
+
+'''
+Fetch phoronix.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class cdnet(BasicNewsRecipe):
+
+    title                  = 'Phoronix'
+    __author__             = 'calibre'
+    description            = 'Actualités Phoronix'
+    encoding               = 'utf-8'
+    publisher              = 'Phoronix.com'
+    category               = 'news, IT, linux'
+    language               = 'en'
+
+    use_embedded_content   = False
+    timefmt                = ' [%d %b %Y]'
+    max_articles_per_feed  = 25
+    no_stylesheets         = True
+    remove_empty_feeds     = True
+    filterDuplicates       = True
+
+    extra_css = '''
+                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    h2 {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    .KonaBody {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                '''
+
+    remove_tags = []
+
+    remove_tags_before = dict(id='phxcms_content_phx')
+    remove_tags_after  = dict(name='div', attrs={'class':'KonaBody'})
+
+    feeds =  [('Phoronix', 'http://feeds.feedburner.com/Phoronix')]
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/recipes/real_world_economics_review.recipe
+++ b/recipes/real_world_economics_review.recipe
@ -0,0 +1,19 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Real_world_economics_review(BasicNewsRecipe):
+    title          = u'Real-world economis review blog'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    __author__ = 'Julio Map'
+    language = 'en'
+
+    no_stylesheets = True
+
+    keep_only_tags = dict(name='div', attrs={'id':'main'})
+    remove_tags = [dict(name='div', attrs={'id':'postpath'}),
+        dict(name='div', attrs={'class':'robots-nocontent sd-block sd-social sd-social-icon-text sd-sharing'}),
+        dict(name='div', attrs={'class':'sharedaddy sd-sharing-enabled'})
+        ]
+
+    feeds          = [(u'Real-World Economics Review Blog', u'http://rwer.wordpress.com/feed/')]
--- a/recipes/revista_piaui.recipe
+++ b/recipes/revista_piaui.recipe
@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class RevistaPiaui(BasicNewsRecipe):
+    title          = u'Revista piau\xed'
+    language = 'pt_BR'
+    __author__ = u'Eduardo Gustini Simões'
+    oldest_article = 31
+    max_articles_per_feed = 50
+    auto_cleanup = True
+
+    feeds          = [(u'Edi\xe7\xe3o Atual', u'http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')]
+
+    def parse_feeds (self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for feed in feeds:
+           for article in feed.articles[:]:
+                 soup = self.index_to_soup('http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')
+                 itemTitle = article.title.partition('|')[0].rstrip()
+                 item = soup.find(text=itemTitle)
+                 articleDescription = item.parent.parent.description.string.partition('<br  />')[2]
+                 article.summary = articleDescription
+
+        return feeds
+
+    def populate_article_metadata(self, article, soup, first):
+        h2 = soup.find('h2')
+        h2.string.replaceWith(h2.string.partition('|')[0].rstrip())
+        h2.replaceWith(h2.prettify() +  '<p><em>' + article.summary + '</em></p><p><em>' + ' posted at ' + article.localtime.strftime('%d-%m-%Y') + '</em></p>')
--- a/recipes/science_aas.recipe
+++ b/recipes/science_aas.recipe
@ -27,12 +27,12 @@ class ScienceAAS(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.LOGIN)
-            br.select_form(name='registered_users_form')
+            br.select_form(nr=1)
            br['username'] = self.username
            br['code'    ] = self.password
            br.submit()
        return br

-    keep_only_tags = [ dict(name='div', attrs={'id':'LegacyContent'}) ]
+    keep_only_tags = [ dict(name='div', attrs={'id':'content-block'}) ]

    feeds       = [(u"Science: Current Issue", u'http://www.sciencemag.org/rss/current.xml')]
--- a/recipes/silicon_republic.recipe
+++ b/recipes/silicon_republic.recipe
@ -0,0 +1,22 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011 Neil Grogan'
+#
+# Silicon Republic Recipe
+#
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SiliconRepublic(BasicNewsRecipe):
+    title          = u'Silicon Republic'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__ = u'Neil Grogan'
+    language = 'en_IE'
+
+    remove_tags = [dict(attrs={'class':['thumb','txt','compactbox','icons','catlist','catlistinner','taglist','taglistinner','social','also-in','also-in-inner','also-in-footer','zonek-dfp','paneladvert','rcadvert','panel','h2b']}),
+                dict(id=['header','logo','header-right','sitesearch','rsslinks','topnav','topvideos','topvideos-list','topnews','topnews-list','slideshow','slides','compactheader','compactnews','compactfeatures','article-type','contactlinks-header','banner-zone-k-dfp','footer-related','directory-services','also-in-section','featuredrelated1','featuredrelated2','featuredrelated3','featuredrelated4','advert2-dfp']),
+                dict(name=['script', 'style'])]
+
+
+    feeds = [(u'News', u'http://www.siliconrepublic.com/feeds/')]
+
--- a/recipes/sueddeutsche.recipe
+++ b/recipes/sueddeutsche.recipe
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class Sueddeutsche(BasicNewsRecipe):

-    title = u'Süddeutsche'
+    title = u'sueddeutsche.de'
    description = 'News from Germany'
    __author__ = 'Oliver Niesner and Armin Geller'
    use_embedded_content   = False
@ -62,7 +62,7 @@ class Sueddeutsche(BasicNewsRecipe):
              (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
              (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
              (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
-              (u'München&Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
+              (u'M&uuml;nchen & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
              (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
              (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
              (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
@ -75,7 +75,7 @@ class Sueddeutsche(BasicNewsRecipe):
              (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'),         # sometimes only
              (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
              (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'),   # sometimes only
-             ]
+            ]

    def print_version(self, url):
        main, sep, id = url.rpartition('/')
--- a/recipes/telepolis.recipe
+++ b/recipes/telepolis.recipe
@ -3,7 +3,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class TelepolisNews(BasicNewsRecipe):
-    title          = u'Telepolis (News+Artikel)'
+    title          = u'Telepolis'
    __author__ = 'syntaxis'
    publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
    description = 'News from Telepolis'
@ -15,11 +15,8 @@ class TelepolisNews(BasicNewsRecipe):
    encoding = "utf-8"
    language = 'de'

-
    remove_empty_feeds = True

-
-
    keep_only_tags = [dict(name = 'div',attrs={'class':'head'}),dict(name = 'div',attrs={'class':'leftbox'}),dict(name='td',attrs={'class':'strict'})]
    remove_tags = [ dict(name='td',attrs={'class':'blogbottom'}),
            dict(name='div',attrs={'class':'forum'}), dict(name='div',attrs={'class':'social'}),dict(name='div',attrs={'class':'blog-letter p-news'}),
@ -28,7 +25,6 @@ class TelepolisNews(BasicNewsRecipe):

    remove_tags_after  = [dict(name='span', attrs={'class':['breadcrumb']})]

-
    feeds          = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]

    html2lrf_options = [
@ -39,8 +35,7 @@ class TelepolisNews(BasicNewsRecipe):

    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

-
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
-        return soup
+        return soup
--- a/recipes/the_oz.recipe
+++ b/recipes/the_oz.recipe
@ -12,21 +12,18 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class DailyTelegraph(BasicNewsRecipe):
    title          = u'The Australian'
    __author__     = u'Matthew Briggs and Sujata Raman'
-    description    = u'National broadsheet newspaper from down under - colloquially known as The Oz'
+    description    = (u'National broadsheet newspaper from down under - colloquially known as The Oz'
+            '. You will need to have a subscription to '
+            'http://www.theaustralian.com.au to get full articles.')
    language = 'en_AU'

    oldest_article = 2
+    needs_subscription = 'optional'
    max_articles_per_feed = 30
    remove_javascript      = True
    no_stylesheets         = True
    encoding               = 'utf8'

-    html2lrf_options = [
-                          '--comment'       , description
-                        , '--category'      , 'news, Australia'
-                        , '--publisher'     , title
-                        ]
-
    keep_only_tags = [dict(name='div', attrs={'id': 'story'})]

    #remove_tags = [dict(name=['object','link'])]
@ -67,6 +64,19 @@ class DailyTelegraph(BasicNewsRecipe):
                    (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
                    (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]

+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        if self.username and self.password:
+            br.open('http://www.theaustralian.com.au')
+            br.select_form(nr=0)
+            br['username'] = self.username
+            br['password'] = self.password
+            raw = br.submit().read()
+            if '>log out' not in raw.lower():
+                raise ValueError('Failed to log in to www.theaustralian.com.au'
+                        ' are your username and password correct?')
+        return br
+
    def get_article_url(self, article):
        return article.id

@ -76,14 +86,4 @@ class DailyTelegraph(BasicNewsRecipe):

        #return br.geturl()

-    def get_cover_url(self):

-        href =  'http://www.theaustralian.news.com.au/'
-
-        soup = self.index_to_soup(href)
-        img = soup.find('img',alt ="AUS HP promo digital2")
-        print img
-        if img :
-           cover_url = img['src']
-
-        return cover_url
--- a/recipes/the_scotsman.recipe
+++ b/recipes/the_scotsman.recipe
@ -1,37 +1,64 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008 - 2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-thescotsman.scotsman.com
+www.scotsman.com/the-scotsman
 '''

 from calibre.web.feeds.news import BasicNewsRecipe

 class TheScotsman(BasicNewsRecipe):
-    title                 = u'The Scotsman'
+    title                 = 'The Scotsman'
    __author__            = 'Darko Miletic'
    description           = 'News from Scotland'
-    oldest_article        = 7
+    publisher             = 'Johnston Publishing Ltd.'
+    category              = 'news, politics, Scotland, UK'    
+    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    language = 'en_GB'
-
-    simultaneous_downloads = 1
-
-    keep_only_tags = [dict(name='div', attrs={'id':'viewarticle'})]
-    remove_tags = [
-                     dict(name='div'  , attrs={'class':'viewarticlepanel' })
-                  ]
-
+    language              = 'en_GB'
+    encoding              = 'utf-8'
+    publication_type      = 'newspaper'
+    remove_empty_feeds    = True    
+    masthead_url          = 'http://www.scotsman.com/webimage/swts_thescotsman_image_e_7_25526!image/3142543874.png_gen/derivatives/default/3142543874.png'
+    extra_css             = 'body{font-family: Arial,Helvetica,sans-serif}'
+    
+    
+    keep_only_tags    = [dict(attrs={'class':'editorialSection'})]
+    remove_tags_after = dict(attrs={'class':'socialBookmarkPanel'})
+    remove_tags       = [
+                           dict(name=['meta','iframe','object','embed','link']),
+                           dict(attrs={'class':['secondaryArticlesNav','socialBookmarkPanel']}),
+                           dict(attrs={'id':'relatedArticles'})
+                        ]
+    remove_attributes = ['lang']
+    
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+    
    feeds          = [
-                        (u'Latest National News', u'http://thescotsman.scotsman.com/getFeed.aspx?Format=rss&sectionid=4068'),
-                        ('UK', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7071&format=rss'),
-                        ('Scotland', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7042&format=rss'),
-                        ('International', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7000&format=rss'),
-                        ('Politics', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6990&format=rss'),
-                        ('Entertainment', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7010&format=rss'),
-                        ('Features', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6996&format=rss'),
-                        ('Opinion', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7074&format=rss'),
+                        ('Latest News'  , 'http://www.scotsman.com/cmlink/1.957140' ),
+                        ('UK'           , 'http://www.scotsman.com/cmlink/1.957142' ),
+                        ('Scotland'     , 'http://www.scotsman.com/cmlink/1.957141' ),
+                        ('International', 'http://www.scotsman.com/cmlink/1.957143' ),
+                        ('Politics'     , 'http://www.scotsman.com/cmlink/1.957044' ),
+                        ('Arts'         , 'http://www.scotsman.com/cmlink/1.1804825'),
+                        ('Entertainment', 'http://www.scotsman.com/cmlink/1.957053' ),
+                        ('Sports'       , 'http://www.scotsman.com/cmlink/1.957151' ),
+                        ('Business'     , 'http://www.scotsman.com/cmlink/1.957156' ),                        
+                        ('Features'     , 'http://www.scotsman.com/cmlink/1.957149' ),
+                        ('Opinion'      , 'http://www.scotsman.com/cmlink/1.957054' )
                     ]
+                     
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@ -10,27 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class USAToday(BasicNewsRecipe):

-    title = 'USA Today'
-    __author__ = 'Kovid Goyal'
-    oldest_article = 1
-    publication_type = 'newspaper'
-    timefmt  = ''
-    max_articles_per_feed = 20
-    language = 'en'
-    no_stylesheets = True
-    extra_css = '.headline      {text-align:    left;}\n    \
-                 .byline        {font-family:   monospace;  \
-                                 text-align:    left;       \
-                                 margin-bottom: 1em;}\n     \
-                 .image         {text-align:    center;}\n  \
-                 .caption       {text-align:    center;     \
-                                 font-size:     smaller;    \
-                                 font-style:    italic}\n   \
-                 .credit        {text-align:    right;      \
-                                 margin-bottom: 0em;        \
-                                 font-size:     smaller;}\n \
-                 .articleBody   {text-align:    left;}\n    '
-    #simultaneous_downloads = 1
+    title                  = 'USA Today'
+    __author__             = 'calibre'
+    description            = 'newspaper'
+    encoding               = 'utf-8'
+    publisher              = 'usatoday.com'
+    category               = 'news, usa'
+    language               = 'en'
+
+    use_embedded_content   = False
+    timefmt                = ' [%d %b %Y]'
+    max_articles_per_feed  = 15
+    no_stylesheets         = True
+    remove_empty_feeds     = True
+    filterDuplicates       = True
+
+    extra_css = '''
+                    h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    #post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    #post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                '''
+
+
    feeds =  [
                ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
                ('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
@ -43,15 +44,18 @@ class USAToday(BasicNewsRecipe):
                ('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
                ('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
                ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
-                ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
+                ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
                ]
+
    keep_only_tags = [dict(attrs={'class':'story'})]
+
    remove_tags = [
            dict(attrs={'class':[
                                'share',
                                'reprints',
                                'inline-h3',
-                                'info-extras',
+                                'info-extras rounded',
+                                'inset',
                                'ppy-outer',
                                'ppy-caption',
                                'comments',
@ -61,9 +65,13 @@ class USAToday(BasicNewsRecipe):
                                'tags',
                                'bottom-tools',
                                'sponsoredlinks',
+                                'corrections'
                                ]}),
+            dict(name='ul', attrs={'class':'inside-copy'}),
            dict(id=['pluck']),
-                  ]
+            dict(id=['updated']),
+            dict(id=['post-date-updated'])
+    ]


    def get_masthead_url(self):
--- a/recipes/wow.recipe
+++ b/recipes/wow.recipe
@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class WoW(BasicNewsRecipe):
+    title          = u'WoW Insider'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = False
+
+    no_stylesheets = True
+    auto_cleanup = True
+
+    feeds          = [
+('WoW',
+ 'http://wow.joystiq.com/rss.xml')
+]
--- a/recipes/zdnet.fr.recipe
+++ b/recipes/zdnet.fr.recipe
@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+__license__   = 'GPL v3'
+__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
+
+'''
+Fetch zdnet.fr
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class zdnet(BasicNewsRecipe):
+
+    title                  = 'ZDNet.fr'
+    __author__             = 'calibre'
+    description            = 'Actualités'
+    encoding               = 'utf-8'
+    publisher              = 'ZDNet.fr'
+    category               = 'Actualité, Informatique, IT'
+    language               = 'fr'
+
+    use_embedded_content   = False
+    timefmt                = ' [%d %b %Y]'
+    max_articles_per_feed  = 15
+    no_stylesheets         = True
+    remove_empty_feeds     = True
+    filterDuplicates       = True
+
+    extra_css = '''
+                    h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    .contentmetadata p {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                    #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                '''
+
+    remove_tags = [
+        dict(name='iframe'),
+        dict(name='div', attrs={'class':['toolbox']}),
+        dict(name='div', attrs={'class':['clear clearfix']}),
+        dict(id='emailtoafriend'),
+        dict(id='storyaudio'),
+        dict(id='fbtwContainer'),
+        dict(name='h5')
+    ]
+
+    remove_tags_before = dict(id='leftcol')
+    remove_tags_after  = dict(id='content')
+
+    feeds =  [
+            ('Informatique', 'http://www.zdnet.fr/feeds/rss/actualites/informatique/'),
+            ('Internet', 'http://www.zdnet.fr/feeds/rss/actualites/internet/'),
+            ('Telecom', 'http://www.zdnet.fr/feeds/rss/actualites/telecoms/')
+    ]
+
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    def get_masthead_url(self):
+        masthead = 'http://www.zdnet.fr/images/base/logo.png'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(masthead)
+        except:
+            self.log("\nCover unavailable")
+            masthead = None
+        return masthead
--- a/resources/images/devices/boox.jpg
+++ b/resources/images/devices/boox.jpg
--- a/resources/templates/rtf.xsl
+++ b/resources/templates/rtf.xsl
@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-    xmlns:html="http://www.w3.org/1999/xhtml"
+    xmlns="http://www.w3.org/1999/xhtml"
    xmlns:rtf="http://rtf2xml.sourceforge.net/"
    xmlns:c="calibre"
    extension-element-prefixes="c"
@ -63,11 +63,16 @@
    </xsl:template>

    <xsl:template name = "para">
-        <xsl:if test = "normalize-space(.) or child::*">
-            <xsl:element name = "p">
-                <xsl:call-template name = "para-content"/>
-            </xsl:element>
-        </xsl:if>
+        <xsl:element name = "p">
+            <xsl:choose>
+                <xsl:when test = "normalize-space(.) or child::*">
+                    <xsl:call-template name = "para-content"/>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:text>&#160;</xsl:text>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:element>
    </xsl:template>

    <xsl:template name = "para_off">
@ -149,7 +154,7 @@
    <xsl:template match="rtf:doc-information" mode="header">
          <link rel="stylesheet" type="text/css" href="styles.css"/>
          <xsl:if test="not(rtf:title)">
-              <title>unamed</title>
+              <title>unnamed</title>
          </xsl:if>
        <xsl:apply-templates/>
    </xsl:template>
@ -445,7 +450,10 @@

    <xsl:template match = "rtf:field[@type='hyperlink']">
        <xsl:element name ="a">
-            <xsl:attribute name = "href"><xsl:if test="not(contains(@link, '/'))">#</xsl:if><xsl:value-of select = "@link"/></xsl:attribute>
+            <xsl:attribute name = "href">
+                <xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
+                <xsl:value-of select = "@link"/>
+            </xsl:attribute>
            <xsl:apply-templates/>
        </xsl:element>
    </xsl:template>
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -225,7 +225,10 @@ except:
    try:
        HOST=get_ip_address('wlan0')
    except:
-        HOST='192.168.1.2'
+        try:
+            HOST=get_ip_address('ppp0')
+        except:
+            HOST='192.168.1.2'

 PROJECT=os.path.basename(os.path.abspath('.'))

--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -336,7 +336,7 @@ class Build(Command):
                oinc = ['/Fo'+obj] if iswindows else ['-o', obj]
                cmd = [compiler] + cflags + ext.cflags + einc + sinc + oinc
                self.info(' '.join(cmd))
-                subprocess.check_call(cmd)
+                self.check_call(cmd)

        dest = self.dest(ext)
        elib = self.lib_dirs_to_ldflags(ext.lib_dirs)
@ -350,18 +350,32 @@ class Build(Command):
            else:
                cmd += objects + ext.extra_objs + ['-o', dest] + ldflags + ext.ldflags + elib + xlib
            self.info('\n\n', ' '.join(cmd), '\n\n')
-            subprocess.check_call(cmd)
+            self.check_call(cmd)
            if iswindows:
                #manifest = dest+'.manifest'
                #cmd = [MT, '-manifest', manifest, '-outputresource:%s;2'%dest]
                #self.info(*cmd)
-                #subprocess.check_call(cmd)
+                #self.check_call(cmd)
                #os.remove(manifest)
                for x in ('.exp', '.lib'):
                    x = os.path.splitext(dest)[0]+x
                    if os.path.exists(x):
                        os.remove(x)

+    def check_call(self, *args, **kwargs):
+        """print cmdline if an error occured
+
+        If something is missing (qmake e.g.) you get a non-informative error
+         self.check_call(qmc + [ext.name+'.pro'])
+         so you would have to look a the source to see the actual command.
+        """
+        try:
+            subprocess.check_call(*args, **kwargs)
+        except:
+            cmdline = ' '.join(['"%s"' % (arg) if ' ' in arg else arg for arg in args[0]])
+            print "Error while executing: %s\n" % (cmdline)
+            raise
+
    def build_qt_objects(self, ext):
        obj_pat = 'release\\*.obj' if iswindows else '*.o'
        objects = glob.glob(obj_pat)
@ -380,8 +394,8 @@ class Build(Command):
            qmc = [QMAKE, '-o', 'Makefile']
            if iswindows:
                qmc += ['-spec', 'win32-msvc2008']
-            subprocess.check_call(qmc + [ext.name+'.pro'])
-            subprocess.check_call([make, '-f', 'Makefile'])
+            self.check_call(qmc + [ext.name+'.pro'])
+            self.check_call([make, '-f', 'Makefile'])
            objects = glob.glob(obj_pat)
        return list(map(self.a, objects))

@ -407,7 +421,7 @@ class Build(Command):
            cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+\
                    pyqt.pyqt_sip_dir] + shlex.split(pyqt.pyqt_sip_flags) + [sipf]
            self.info(' '.join(cmd))
-            subprocess.check_call(cmd)
+            self.check_call(cmd)
        module = self.j(src_dir, self.b(dest))
        if self.newer(dest, [sbf]+qt_objects):
            mf = self.j(src_dir, 'Makefile')
@ -417,7 +431,7 @@ class Build(Command):
            makefile.extra_include_dirs = ext.inc_dirs
            makefile.generate()

-            subprocess.check_call([make, '-f', mf], cwd=src_dir)
+            self.check_call([make, '-f', mf], cwd=src_dir)
            shutil.copy2(module, dest)

    def clean(self):
@ -457,7 +471,7 @@ class BuildPDF2XML(Command):
                    cmd += ['-I'+x for x in poppler_inc_dirs+magick_inc_dirs]
                    cmd += ['/Fo'+obj, src]
                self.info(*cmd)
-                subprocess.check_call(cmd)
+                self.check_call(cmd)
            objects.append(obj)

        if self.newer(dest, objects):
@ -470,7 +484,7 @@ class BuildPDF2XML(Command):
                        png_libs+magick_libs+poppler_libs+ft_libs+jpg_libs+pdfreflow_libs]
                cmd += ['/OUT:'+dest] + objects
            self.info(*cmd)
-            subprocess.check_call(cmd)
+            self.check_call(cmd)

        self.info('Binary installed as', dest)

--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -20,17 +20,23 @@ for x in [
    EXCLUDES.extend(['--exclude', x])
 SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]

+def get_rsync_pw():
+    return open('/home/kovid/work/kde/conf/buildbot').read().partition(
+                ':')[-1].strip()
+
 class Rsync(Command):

    description = 'Sync source tree from development machine'

    SYNC_CMD = ' '.join(BASE_RSYNC+SAFE_EXCLUDES+
-            ['rsync://{host}/work/{project}', '..'])
+            ['rsync://buildbot@{host}/work/{project}', '..'])

    def run(self, opts):
        cmd = self.SYNC_CMD.format(host=HOST, project=PROJECT)
+        env = dict(os.environ)
+        env['RSYNC_PASSWORD'] = get_rsync_pw()
        self.info(cmd)
-        subprocess.check_call(cmd, shell=True)
+        subprocess.check_call(cmd, shell=True, env=env)


 class Push(Command):
@ -81,7 +87,8 @@ class VMInstaller(Command):


    def get_build_script(self):
-        ans = '\n'.join(self.BUILD_PREFIX)+'\n\n'
+        rs = ['export RSYNC_PASSWORD=%s'%get_rsync_pw()]
+        ans = '\n'.join(self.BUILD_PREFIX + rs)+'\n\n'
        ans += ' && \\\n'.join(self.BUILD_RSYNC) + ' && \\\n'
        ans += ' && \\\n'.join(self.BUILD_CLEAN) + ' && \\\n'
        ans += ' && \\\n'.join(self.BUILD_BUILD) + ' && \\\n'
--- a/setup/iso_639/ca.po
+++ b/setup/iso_639/ca.po
--- a/setup/iso_639/es.po
+++ b/setup/iso_639/es.po
@ -7,15 +7,15 @@ msgid ""
 msgstr ""
 "Project-Id-Version: calibre\n"
 "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
-"POT-Creation-Date: 2011-09-02 16:21+0000\n"
-"PO-Revision-Date: 2011-09-21 13:48+0000\n"
-"Last-Translator: Jellby <Unknown>\n"
+"POT-Creation-Date: 2011-09-27 14:31+0000\n"
+"PO-Revision-Date: 2011-10-22 22:04+0000\n"
+"Last-Translator: Fitoschido <fitoschido@gmail.com>\n"
 "Language-Team: Spanish <es@li.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-09-22 04:47+0000\n"
-"X-Generator: Launchpad (build 13996)\n"
+"X-Launchpad-Export-Date: 2011-10-23 05:13+0000\n"
+"X-Generator: Launchpad (build 14170)\n"

 #. name for aaa
 msgid "Ghotuo"
@ -5911,7 +5911,7 @@ msgstr "Gwahatike"

 #. name for dai
 msgid "Day"
-msgstr "Day"
+msgstr "Día"

 #. name for daj
 msgid "Daju; Dar Fur"
@ -18231,7 +18231,7 @@ msgstr ""

 #. name for nhi
 msgid "Nahuatl; Zacatlán-Ahuacatlán-Tepetzintla"
-msgstr "Náhuatl de Zacatlán; Ahuacatlán y Tepetzintla"
+msgstr "Náhuatl de Zacatlán-Ahuacatlán-Tepetzintla"

 #. name for nhk
 msgid "Nahuatl; Isthmus-Cosoleacaque"
--- a/setup/iso_639/hu.po
+++ b/setup/iso_639/hu.po
@ -9,49 +9,49 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-09-27 14:31+0000\n"
-"PO-Revision-Date: 2011-09-27 18:23+0000\n"
-"Last-Translator: Kovid Goyal <Unknown>\n"
+"PO-Revision-Date: 2011-10-15 17:29+0000\n"
+"Last-Translator: Devilinside <Unknown>\n"
 "Language-Team: Hungarian <debian-l10n-hungarian@lists.d.o>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-09-28 04:50+0000\n"
-"X-Generator: Launchpad (build 14049)\n"
+"X-Launchpad-Export-Date: 2011-10-16 05:14+0000\n"
+"X-Generator: Launchpad (build 14124)\n"
 "X-Poedit-Country: HUNGARY\n"
 "Language: hu\n"
 "X-Poedit-Language: Hungarian\n"

 #. name for aaa
 msgid "Ghotuo"
-msgstr ""
+msgstr "Ghotuo"

 #. name for aab
 msgid "Alumu-Tesu"
-msgstr ""
+msgstr "Alumu-Tesu"

 #. name for aac
 msgid "Ari"
-msgstr ""
+msgstr "Ari"

 #. name for aad
 msgid "Amal"
-msgstr ""
+msgstr "Amal"

 #. name for aae
 msgid "Albanian; Arbëreshë"
-msgstr ""
+msgstr "Albán;  Arbëreshë"

 #. name for aaf
 msgid "Aranadan"
-msgstr ""
+msgstr "Aranadan"

 #. name for aag
 msgid "Ambrak"
-msgstr ""
+msgstr "Ambrak"

 #. name for aah
 msgid "Arapesh; Abu'"
-msgstr ""
+msgstr "Arapesh; Abu'"

 #. name for aai
 msgid "Arifama-Miniafia"
@ -75,7 +75,7 @@ msgstr ""

 #. name for aao
 msgid "Arabic; Algerian Saharan"
-msgstr ""
+msgstr "Arab; Algériai Szaharai"

 #. name for aap
 msgid "Arára; Pará"
@ -87,7 +87,7 @@ msgstr ""

 #. name for aar
 msgid "Afar"
-msgstr "afar"
+msgstr "Afar"

 #. name for aas
 msgid "Aasáx"
@ -498,10 +498,9 @@ msgstr ""
 msgid "Tapei"
 msgstr ""

-# src/trans.h:281 src/trans.h:318
 #. name for afr
 msgid "Afrikaans"
-msgstr "afrikaans"
+msgstr "Afrikaans"

 #. name for afs
 msgid "Creole; Afro-Seminole"
@ -801,7 +800,7 @@ msgstr ""

 #. name for aka
 msgid "Akan"
-msgstr "akan"
+msgstr "Akan"

 #. name for akb
 msgid "Batak Angkola"
@ -1015,10 +1014,9 @@ msgstr ""
 msgid "Amarag"
 msgstr ""

-# src/trans.h:283
 #. name for amh
 msgid "Amharic"
-msgstr "amhara"
+msgstr "Amhara"

 #. name for ami
 msgid "Amis"
@ -1425,10 +1423,9 @@ msgstr ""
 msgid "Arrarnta; Western"
 msgstr ""

-# src/trans.h:294
 #. name for arg
 msgid "Aragonese"
-msgstr "aragóniai"
+msgstr "Aragóniai"

 #. name for arh
 msgid "Arhuaco"
@ -1548,7 +1545,7 @@ msgstr ""

 #. name for asm
 msgid "Assamese"
-msgstr "asszámi"
+msgstr "Asszámi"

 #. name for asn
 msgid "Asuriní; Xingú"
@ -1790,10 +1787,9 @@ msgstr ""
 msgid "Arabic; Uzbeki"
 msgstr ""

-# src/trans.h:283
 #. name for ava
 msgid "Avaric"
-msgstr "avar"
+msgstr "Avar"

 #. name for avb
 msgid "Avau"
@ -1805,7 +1801,7 @@ msgstr ""

 #. name for ave
 msgid "Avestan"
-msgstr "aveszti"
+msgstr "Avesztai"

 #. name for avi
 msgid "Avikam"
@ -1945,7 +1941,7 @@ msgstr ""

 #. name for ayc
 msgid "Aymara; Southern"
-msgstr ""
+msgstr "Ajmara; Déli"

 #. name for ayd
 msgid "Ayabadhu"
@ -1977,7 +1973,7 @@ msgstr ""

 #. name for aym
 msgid "Aymara"
-msgstr "aymara"
+msgstr "Ajmara"

 #. name for ayn
 msgid "Arabic; Sanaani"
@ -1997,7 +1993,7 @@ msgstr ""

 #. name for ayr
 msgid "Aymara; Central"
-msgstr ""
+msgstr "Ajmara; Közép"

 #. name for ays
 msgid "Ayta; Sorsogon"
@ -2025,12 +2021,11 @@ msgstr ""

 #. name for azb
 msgid "Azerbaijani; South"
-msgstr ""
+msgstr "Azeri; Déli"

-# src/trans.h:311
 #. name for aze
 msgid "Azerbaijani"
-msgstr "azeri"
+msgstr "Azeri"

 #. name for azg
 msgid "Amuzgo; San Pedro Amuzgos"
@ -2038,7 +2033,7 @@ msgstr ""

 #. name for azj
 msgid "Azerbaijani; North"
-msgstr ""
+msgstr "Azeri; Északi"

 #. name for azm
 msgid "Amuzgo; Ipalapa"
@ -2090,7 +2085,7 @@ msgstr ""

 #. name for bak
 msgid "Bashkir"
-msgstr "baskír"
+msgstr "Baskír"

 #. name for bal
 msgid "Baluchi"
@ -2115,7 +2110,7 @@ msgstr ""

 #. name for bar
 msgid "Bavarian"
-msgstr ""
+msgstr "Bajor"

 #. name for bas
 msgid "Basa (Cameroon)"
@ -2497,10 +2492,9 @@ msgstr "beja"
 msgid "Bebeli"
 msgstr ""

-# src/trans.h:286
 #. name for bel
 msgid "Belarusian"
-msgstr "belorusz"
+msgstr "Belarusz"

 #. name for bem
 msgid "Bemba (Zambia)"
@ -2508,7 +2502,7 @@ msgstr ""

 #. name for ben
 msgid "Bengali"
-msgstr "bengáli"
+msgstr "Bengáli"

 #. name for beo
 msgid "Beami"
@ -3510,10 +3504,9 @@ msgstr ""
 msgid "Borôro"
 msgstr ""

-# src/trans.h:309
 #. name for bos
 msgid "Bosnian"
-msgstr "bosnyák"
+msgstr "Bosnyák"

 #. name for bot
 msgid "Bongo"
@ -3685,7 +3678,7 @@ msgstr ""

 #. name for bqn
 msgid "Bulgarian Sign Language"
-msgstr ""
+msgstr "Bolgár jelnyelv"

 #. name for bqo
 msgid "Balo"
@ -4078,10 +4071,9 @@ msgstr ""
 msgid "Bugawac"
 msgstr ""

-# src/trans.h:285
 #. name for bul
 msgid "Bulgarian"
-msgstr "bolgár"
+msgstr "Bolgár"

 #. name for bum
 msgid "Bulu (Cameroon)"
@ -7445,10 +7437,9 @@ msgstr ""
 msgid "Semimi"
 msgstr ""

-# src/trans.h:284
 #. name for eus
 msgid "Basque"
-msgstr "baszk"
+msgstr "Baszk"

 #. name for eve
 msgid "Even"
@ -7534,10 +7525,9 @@ msgstr ""
 msgid "Fang (Equatorial Guinea)"
 msgstr ""

-# src/trans.h:294
 #. name for fao
 msgid "Faroese"
-msgstr "feröi"
+msgstr "Feröeri"

 #. name for fap
 msgid "Palor"
@ -29414,7 +29404,7 @@ msgstr ""

 #. name for xzp
 msgid "Zapotec; Ancient"
-msgstr ""
+msgstr "Zapoték; Ősi"

 #. name for yaa
 msgid "Yaminahua"
@ -30326,27 +30316,27 @@ msgstr ""

 #. name for zaa
 msgid "Zapotec; Sierra de Juárez"
-msgstr ""
+msgstr "Zapoték; Sierra de Juárezi"

 #. name for zab
 msgid "Zapotec; San Juan Guelavía"
-msgstr ""
+msgstr "Zapoték; San Juan Guelavíai"

 #. name for zac
 msgid "Zapotec; Ocotlán"
-msgstr ""
+msgstr "Zapoték; Ocotláni"

 #. name for zad
 msgid "Zapotec; Cajonos"
-msgstr "zapoték; Cajonos"
+msgstr "Zapoték; Cajonesi"

 #. name for zae
 msgid "Zapotec; Yareni"
-msgstr "zapoték; Yareni"
+msgstr "Zapoték; Yareni"

 #. name for zaf
 msgid "Zapotec; Ayoquesco"
-msgstr ""
+msgstr "Zapoték; Ayoquescoi"

 #. name for zag
 msgid "Zaghawa"
@ -30358,7 +30348,7 @@ msgstr "zangval"

 #. name for zai
 msgid "Zapotec; Isthmus"
-msgstr "zapoték; Isthmus"
+msgstr "Zapoték; Isthmusi"

 #. name for zaj
 msgid "Zaramo"
@ -30374,31 +30364,31 @@ msgstr "zozu"

 #. name for zam
 msgid "Zapotec; Miahuatlán"
-msgstr ""
+msgstr "Zapoték; Miahuatláni"

 #. name for zao
 msgid "Zapotec; Ozolotepec"
-msgstr ""
+msgstr "Zapoték; Ozolotepeci"

 #. name for zap
 msgid "Zapotec"
-msgstr "zapoték"
+msgstr "Zapoték"

 #. name for zaq
 msgid "Zapotec; Aloápam"
-msgstr ""
+msgstr "Zapoték; Aloápami"

 #. name for zar
 msgid "Zapotec; Rincón"
-msgstr "zapoték; Rincón"
+msgstr "Zapoték; Rincóni"

 #. name for zas
 msgid "Zapotec; Santo Domingo Albarradas"
-msgstr ""
+msgstr "Zapoték; Santo Domingo Albarradasi"

 #. name for zat
 msgid "Zapotec; Tabaa"
-msgstr "zapoték; Tabaa"
+msgstr "Zapoték; Tabaa-i"

 # src/trans.h:193
 #. name for zau
@ -30407,15 +30397,15 @@ msgstr "zangskari"

 #. name for zav
 msgid "Zapotec; Yatzachi"
-msgstr ""
+msgstr "Zapoték; Yatzachi-i"

 #. name for zaw
 msgid "Zapotec; Mitla"
-msgstr "zapoték; Mitla"
+msgstr "Zapoték; Mitlai"

 #. name for zax
 msgid "Zapotec; Xadani"
-msgstr "zapoték; Xadani"
+msgstr "Zapoték; Xadani-i"

 #. name for zay
 msgid "Zayse-Zergulla"
@ -30991,7 +30981,7 @@ msgstr "tokano"

 #. name for zul
 msgid "Zulu"
-msgstr "zulu"
+msgstr "Zulu"

 # src/trans.h:316
 #. name for zum
--- a/setup/iso_639/tr.po
+++ b/setup/iso_639/tr.po
@ -10,14 +10,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-09-27 14:31+0000\n"
-"PO-Revision-Date: 2011-09-27 18:36+0000\n"
-"Last-Translator: Kovid Goyal <Unknown>\n"
+"PO-Revision-Date: 2011-10-25 19:06+0000\n"
+"Last-Translator: zeugma <Unknown>\n"
 "Language-Team: Turkish <gnome-turk@gnome.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2011-09-28 05:12+0000\n"
-"X-Generator: Launchpad (build 14049)\n"
+"X-Launchpad-Export-Date: 2011-10-26 05:13+0000\n"
+"X-Generator: Launchpad (build 14189)\n"
 "Language: tr\n"

 #. name for aaa
@ -54,7 +54,7 @@ msgstr ""

 #. name for aai
 msgid "Arifama-Miniafia"
-msgstr ""
+msgstr "Arifama-Miniafia"

 #. name for aak
 msgid "Ankave"
@ -122,7 +122,7 @@ msgstr "Bankon"

 #. name for abc
 msgid "Ayta; Ambala"
-msgstr ""
+msgstr "Ayta; Ambala"

 #. name for abd
 msgid "Manide"
@ -130,11 +130,11 @@ msgstr "Manide"

 #. name for abe
 msgid "Abnaki; Western"
-msgstr ""
+msgstr "Abnaki; Western"

 #. name for abf
 msgid "Abai Sungai"
-msgstr ""
+msgstr "Abai Sungai"

 #. name for abg
 msgid "Abaga"
@ -146,7 +146,7 @@ msgstr "Arapça; Tacikçe"

 #. name for abi
 msgid "Abidji"
-msgstr ""
+msgstr "Abidji"

 #. name for abj
 msgid "Aka-Bea"
@ -158,7 +158,7 @@ msgstr "Abhazca"

 #. name for abl
 msgid "Lampung Nyo"
-msgstr ""
+msgstr "Lampung Nyo"

 #. name for abm
 msgid "Abanyom"
@ -282,7 +282,7 @@ msgstr "Achterhoeks"

 #. name for acu
 msgid "Achuar-Shiwiar"
-msgstr ""
+msgstr "Achuar-Shiwiar"

 #. name for acv
 msgid "Achumawi"
--- a/setup/resources.py
+++ b/setup/resources.py
@ -206,7 +206,7 @@ class Resources(Command):
        function_dict = {}
        import inspect
        from calibre.utils.formatter_functions import formatter_functions
-        for obj in formatter_functions.get_builtins().values():
+        for obj in formatter_functions().get_builtins().values():
            eval_func = inspect.getmembers(obj,
                    lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
            try:
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -278,6 +278,8 @@ def get_proxies(debug=True):
            continue
        if proxy.startswith(key+'://'):
            proxy = proxy[len(key)+3:]
+        if key == 'https' and proxy.startswith('http://'):
+            proxy = proxy[7:]
        if proxy.endswith('/'):
            proxy = proxy[:-1]
        if len(proxy) > 4:
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 21)
+numeric_version = (0, 8, 24)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -502,6 +502,7 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
 # }}}

 from calibre.ebooks.comic.input import ComicInput
+from calibre.ebooks.djvu.input import DJVUInput
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.fb2.input import FB2Input
 from calibre.ebooks.html.input import HTMLInput
@ -555,7 +556,8 @@ from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK, NOOK_COLOR
-from calibre.devices.prs505.driver import PRS505, PRST1
+from calibre.devices.prs505.driver import PRS505
+from calibre.devices.prst1.driver import PRST1
 from calibre.devices.user_defined.driver import USER_DEFINED
 from calibre.devices.android.driver import ANDROID, S60, WEBOS
 from calibre.devices.nokia.driver import N770, N810, E71X, E52
@ -599,6 +601,7 @@ plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]

 plugins += [
    ComicInput,
+    DJVUInput,
    EPUBInput,
    FB2Input,
    HTMLInput,
@ -1143,6 +1146,16 @@ class StoreAmazonDEKindleStore(StoreBase):
    formats = ['KINDLE']
    affiliate = True

+class StoreAmazonFRKindleStore(StoreBase):
+    name = 'Amazon FR Kindle'
+    author = 'Charles Haley'
+    description = u'Tous les ebooks Kindle'
+    actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonFRKindleStore'
+
+    headquarters = 'DE'
+    formats = ['KINDLE']
+    affiliate = True
+
 class StoreAmazonUKKindleStore(StoreBase):
    name = 'Amazon UK Kindle'
    author = 'Charles Haley'
@ -1520,6 +1533,7 @@ plugins += [
    StoreArchiveOrgStore,
    StoreAmazonKindleStore,
    StoreAmazonDEKindleStore,
+    StoreAmazonFRKindleStore,
    StoreAmazonUKKindleStore,
    StoreBaenWebScriptionStore,
    StoreBNStore,
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -4,7 +4,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import sys
 from itertools import izip
 from xml.sax.saxutils import escape

@ -742,7 +741,7 @@ class PocketBook900Output(OutputProfile):
    screen_size               = (810, 1180)
    dpi                       = 150.0
    comic_screen_size         = screen_size
-	
+
 output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
        SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
        HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
--- a/src/calibre/db/backend.py
+++ b/src/calibre/db/backend.py
@ -341,7 +341,9 @@ class DB(object):
                                if f['is_custom']]
                for f in fmvals:
                    self.create_custom_column(f['label'], f['name'],
-                            f['datatype'], f['is_multiple'] is not None,
+                            f['datatype'],
+                            (f['is_multiple'] is not None and
+                                len(f['is_multiple']) > 0),
                            f['is_editable'], f['display'])

        defs = self.prefs.defaults
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -49,6 +49,15 @@ class ANDROID(USBMS):
                       0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
                       0x70c6 : [0x226]
                     },
+            # Freescale
+            0x15a2 : {
+                0x0c01 : [0x226]
+            },
+
+            # Alcatel
+            0x05c6 : {
+                0x9018 : [0x0226],
+            },

            # Sony Ericsson
            0xfce : {
@ -62,12 +71,13 @@ class ANDROID(USBMS):
                0x4e11 : [0x0100, 0x226, 0x227],
                0x4e12 : [0x0100, 0x226, 0x227],
                0x4e21 : [0x0100, 0x226, 0x227],
-                0xb058 : [0x0222, 0x226, 0x227]
+                0xb058 : [0x0222, 0x226, 0x227],
+                0x0ff9 : [0x0226],
            },

            # Samsung
            0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
-                       0x681c : [0x0222, 0x0224, 0x0400],
+                       0x681c : [0x0222, 0x0223, 0x0224, 0x0400],
                       0x6640 : [0x0100],
                       0x685b : [0x0400],
                       0x685e : [0x0400],
@ -138,7 +148,8 @@ class ANDROID(USBMS):
    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
-            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
+            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
+            'VIZIO', 'GOOGLE', 'FREESCAL']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -149,7 +160,7 @@ class ANDROID(USBMS):
            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
            'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
            'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
-            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
+            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -116,6 +116,7 @@ class BOOX(HANLINV3):
    supported_platforms = ['windows', 'osx', 'linux']
    METADATA_CACHE = '.metadata.calibre'
    DRIVEINFO = '.driveinfo.calibre'
+    icon           = I('devices/boox.jpg')

    # Ordered list of supported formats
    FORMATS     = ['epub', 'fb2', 'djvu', 'pdf', 'html', 'txt', 'rtf', 'mobi',
@ -123,7 +124,7 @@ class BOOX(HANLINV3):

    VENDOR_ID   = [0x0525]
    PRODUCT_ID  = [0xa4a5]
-    BCD         = [0x322]
+    BCD         = [0x322, 0x323]

    MAIN_MEMORY_VOLUME_LABEL  = 'BOOX Internal Memory'
    STORAGE_CARD_VOLUME_LABEL = 'BOOX Storage Card'
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -62,7 +62,7 @@ class DevicePlugin(Plugin):
    #: Icon for this device
    icon = I('reader.png')

-    # Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
+    # Encapsulates an annotation fetched from the device
    UserAnnotation = namedtuple('Annotation','type, value')

    #: GUI displays this as a message if not None. Useful if opening can take a
@ -217,7 +217,7 @@ class DevicePlugin(Plugin):
        '''
        Unix version of :meth:`can_handle_windows`

-        :param device_info: Is a tupe of (vid, pid, bcd, manufacturer, product,
+        :param device_info: Is a tuple of (vid, pid, bcd, manufacturer, product,
                            serial number)

        '''
@ -464,6 +464,13 @@ class DevicePlugin(Plugin):
        '''
        pass

+    def prepare_addable_books(self, paths):
+        '''
+        Given a list of paths, returns another list of paths. These paths
+        point to addable versions of the books.
+        '''
+        return paths
+
 class BookList(list):
    '''
    A list of books. Each Book object must have the fields
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -13,6 +13,8 @@ import datetime, os, re, sys, json, hashlib
 from calibre.devices.kindle.apnx import APNXBuilder
 from calibre.devices.kindle.bookmark import Bookmark
 from calibre.devices.usbms.driver import USBMS
+from calibre.ebooks.metadata import MetaInformation
+from calibre import strftime

 '''
 Notes on collections:
@ -164,6 +166,121 @@ class KINDLE(USBMS):
        # This returns as job.result in gui2.ui.annotations_fetched(self,job)
        return bookmarked_books

+    def generate_annotation_html(self, bookmark):
+        from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
+        # Returns <div class="user_annotations"> ... </div>
+        last_read_location = bookmark.last_read_location
+        timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
+        percent_read = bookmark.percent_read
+
+        ka_soup = BeautifulSoup()
+        dtc = 0
+        divTag = Tag(ka_soup,'div')
+        divTag['class'] = 'user_annotations'
+
+        # Add the last-read location
+        spanTag = Tag(ka_soup, 'span')
+        spanTag['style'] = 'font-weight:bold'
+        if bookmark.book_format == 'pdf':
+            spanTag.insert(0,NavigableString(
+                _("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
+                            dict(time=strftime(u'%x', timestamp.timetuple()),
+                            loc=last_read_location,
+                            pr=percent_read)))
+        else:
+            spanTag.insert(0,NavigableString(
+                _("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
+                            dict(time=strftime(u'%x', timestamp.timetuple()),
+                            loc=last_read_location,
+                            pr=percent_read)))
+
+        divTag.insert(dtc, spanTag)
+        dtc += 1
+        divTag.insert(dtc, Tag(ka_soup,'br'))
+        dtc += 1
+
+        if bookmark.user_notes:
+            user_notes = bookmark.user_notes
+            annotations = []
+
+            # Add the annotations sorted by location
+            # Italicize highlighted text
+            for location in sorted(user_notes):
+                if user_notes[location]['text']:
+                    annotations.append(
+                            _('<b>Location %(dl)d &bull; %(typ)s</b><br />%(text)s<br />') % \
+                                        dict(dl=user_notes[location]['displayed_location'],
+                                            typ=user_notes[location]['type'],
+                                            text=(user_notes[location]['text'] if \
+                                            user_notes[location]['type'] == 'Note' else \
+                                            '<i>%s</i>' % user_notes[location]['text'])))
+                else:
+                    if bookmark.book_format == 'pdf':
+                        annotations.append(
+                                _('<b>Page %(dl)d &bull; %(typ)s</b><br />') % \
+                                    dict(dl=user_notes[location]['displayed_location'],
+                                        typ=user_notes[location]['type']))
+                    else:
+                        annotations.append(
+                                _('<b>Location %(dl)d &bull; %(typ)s</b><br />') % \
+                                    dict(dl=user_notes[location]['displayed_location'],
+                                        typ=user_notes[location]['type']))
+
+            for annotation in annotations:
+                divTag.insert(dtc, annotation)
+                dtc += 1
+
+        ka_soup.insert(0,divTag)
+        return ka_soup
+
+
+    def add_annotation_to_library(self, db, db_id, annotation):
+        from calibre.ebooks.BeautifulSoup import Tag
+        bm = annotation
+        ignore_tags = set(['Catalog', 'Clippings'])
+
+        if bm.type == 'kindle_bookmark':
+            mi = db.get_metadata(db_id, index_is_id=True)
+            user_notes_soup = self.generate_annotation_html(bm.value)
+            if mi.comments:
+                a_offset = mi.comments.find('<div class="user_annotations">')
+                ad_offset = mi.comments.find('<hr class="annotations_divider" />')
+
+                if a_offset >= 0:
+                    mi.comments = mi.comments[:a_offset]
+                if ad_offset >= 0:
+                    mi.comments = mi.comments[:ad_offset]
+                if set(mi.tags).intersection(ignore_tags):
+                    return
+                if mi.comments:
+                    hrTag = Tag(user_notes_soup,'hr')
+                    hrTag['class'] = 'annotations_divider'
+                    user_notes_soup.insert(0, hrTag)
+
+                mi.comments += unicode(user_notes_soup.prettify())
+            else:
+                mi.comments = unicode(user_notes_soup.prettify())
+            # Update library comments
+            db.set_comment(db_id, mi.comments)
+
+            # Add bookmark file to db_id
+            db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
+                                            bm.value.path, index_is_id=True)
+        elif bm.type == 'kindle_clippings':
+            # Find 'My Clippings' author=Kindle in database, or add
+            last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
+            mc_id = list(db.data.search_getting_ids('title:"My Clippings"', ''))
+            if mc_id:
+                db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'],
+                        index_is_id=True)
+                mi = db.get_metadata(mc_id[0], index_is_id=True)
+                mi.comments = last_update
+                db.set_metadata(mc_id[0], mi)
+            else:
+                mi = MetaInformation('My Clippings', authors = ['Kindle'])
+                mi.tags = ['Clippings']
+                mi.comments = last_update
+                db.add_books([bm.value['path']], ['txt'], [mi])

 class KINDLE2(KINDLE):

--- a/src/calibre/devices/kobo/bookmark.py
+++ b/src/calibre/devices/kobo/bookmark.py
@ -0,0 +1,112 @@
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+from contextlib import closing
+
+import sqlite3 as sqlite
+
+class Bookmark(): # {{{
+    '''
+    A simple class fetching bookmark data
+    kobo-specific
+    '''
+    def __init__(self, db_path, contentid, path, id, book_format, bookmark_extension):
+        self.book_format = book_format
+        self.bookmark_extension = bookmark_extension
+        self.book_length = 0            # Not Used
+        self.id = id
+        self.last_read = 0
+        self.last_read_location = 0     # Not Used
+        self.path = path
+        self.timestamp = 0
+        self.user_notes = None
+        self.db_path = db_path
+        self.contentid = contentid
+        self.percent_read = 0
+        self.get_bookmark_data()
+        self.get_book_length()          # Not Used
+
+    def get_bookmark_data(self):
+        ''' Return the timestamp and last_read_location '''
+
+        user_notes = {}
+        self.timestamp = os.path.getmtime(self.path)
+        with closing(sqlite.connect(self.db_path)) as connection:
+            # return bytestrings if the content cannot the decoded as unicode
+            connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
+
+            cursor = connection.cursor()
+            t = (self.contentid,)
+
+            cursor.execute('select bm.bookmarkid, bm.contentid, bm.volumeid, '
+                                'bm.text, bm.annotation, bm.ChapterProgress, '
+                                'bm.StartContainerChildIndex, bm.StartOffset, c.BookTitle, '
+                                'c.TITLE, c.volumeIndex, c.___NumPages '
+                            'from Bookmark bm inner join Content c on '
+                                'bm.contentid = c.contentid and '
+                                'bm.volumeid = ? order by bm.volumeid, bm.chapterprogress', t)
+
+            previous_chapter = 0
+            bm_count = 0
+            for row in cursor:
+                current_chapter = row[10]
+                if previous_chapter == current_chapter:
+                    bm_count = bm_count + 1
+                else:
+                    bm_count = 0
+
+                text = row[3]
+                annotation = row[4]
+
+                # A dog ear (bent upper right corner) is a bookmark
+                if row[6] == row[7] == 0:   # StartContainerChildIndex = StartOffset = 0
+                    e_type = 'Bookmark'
+                    text = row[9]
+                # highlight is text with no annotation
+                elif text is not None and (annotation is None or annotation == ""):
+                    e_type = 'Highlight'
+                elif text and annotation:
+                    e_type = 'Annotation'
+                else:
+                    e_type = 'Unknown annotation type'
+
+                note_id = row[10] + bm_count
+                chapter_title = row[9]
+                # book_title = row[8]
+                chapter_progress = min(round(float(100*row[5]),2),100)
+                user_notes[note_id] = dict(id=self.id,
+                                        displayed_location=note_id,
+                                        type=e_type,
+                                        text=text,
+                                        annotation=annotation,
+                                        chapter=row[10],
+                                        chapter_title=chapter_title,
+                                        chapter_progress=chapter_progress)
+                previous_chapter = row[10]
+                # debug_print("e_type:" , e_type, '\t', 'loc: ', note_id, 'text: ', text,
+                        # 'annotation: ', annotation, 'chapter_title: ', chapter_title,
+                        # 'chapter_progress: ', chapter_progress, 'date: ')
+
+            cursor.execute('select datelastread, ___PercentRead from content '
+                                'where bookid is Null and '
+                                'contentid = ?', t)
+            for row in cursor:
+                self.last_read = row[0]
+                self.percent_read = row[1]
+                # print row[1]
+            cursor.close()
+
+#                self.last_read_location = self.last_read - self.pdf_page_offset
+        self.user_notes = user_notes
+
+
+    def get_book_length(self):
+#TL        self.book_length = 0
+#TL        self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
+        pass
+
+# }}}
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -2,20 +2,22 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

 __license__   = 'GPL v3'
-__copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
+__copyright__ = '2010, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os
+import os, time, calendar
 import sqlite3 as sqlite
 from contextlib import closing
 from calibre.devices.usbms.books import BookList
 from calibre.devices.kobo.books import Book
 from calibre.devices.kobo.books import ImageWrapper
+from calibre.devices.kobo.bookmark import Bookmark
 from calibre.devices.mime import mime_type_ext
 from calibre.devices.usbms.driver import USBMS, debug_print
 from calibre import prints
 from calibre.devices.usbms.books import CollectionsBookList
 from calibre.utils.magick.draw import save_cover_data_to
+from calibre.ptempfile import PersistentTemporaryFile

 class KOBO(USBMS):

@ -23,7 +25,7 @@ class KOBO(USBMS):
    gui_name = 'Kobo Reader'
    description = _('Communicate with the Kobo Reader')
    author = 'Timothy Legge'
-    version = (1, 0, 10)
+    version = (1, 0, 11)

    dbversion = 0
    fwversion = 0
@ -46,6 +48,7 @@ class KOBO(USBMS):

    EBOOK_DIR_MAIN = ''
    SUPPORTS_SUB_DIRS = True
+    SUPPORTS_ANNOTATIONS = True

    VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo'])

@ -105,6 +108,7 @@ class KOBO(USBMS):

        if self.fwversion != '1.0' and self.fwversion != '1.4':
            self.has_kepubs = True
+        debug_print('Version of driver: ', self.version, 'Has kepubs:', self.has_kepubs)
        debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs)

        self.booklist_class.rebuild_collections = self.rebuild_collections
@ -370,7 +374,7 @@ class KOBO(USBMS):
            path_prefix = '.kobo/images/'
            path = self._main_prefix + path_prefix + ImageID

-            file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed',)
+            file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed', ' - N3_FULL.parsed',)

            for ending in file_endings:
                fpath = path + ending
@ -750,9 +754,12 @@ class KOBO(USBMS):

        blists = {}
        for i in paths:
-            if booklists[i] is not None:
-               #debug_print('Booklist: ', i)
-               blists[i] = booklists[i]
+            try:
+                if booklists[i] is not None:
+                    #debug_print('Booklist: ', i)
+                    blists[i] = booklists[i]
+            except IndexError:
+                pass
        opts = self.settings()
        if opts.extra_customization:
            collections = [x.lower().strip() for x in
@ -843,6 +850,7 @@ class KOBO(USBMS):
                            ' - N3_LIBRARY_FULL.parsed':(355,530),
                            ' - N3_LIBRARY_GRID.parsed':(149,233),
                            ' - N3_LIBRARY_LIST.parsed':(60,90),
+                            ' - N3_FULL.parsed':(600,800),
                            ' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)}

                    for ending, resize in file_endings.items():
@ -865,3 +873,216 @@ class KOBO(USBMS):
                else:
                    debug_print("ImageID could not be retreived from the database")

+    def prepare_addable_books(self, paths):
+        '''
+        The Kobo supports an encrypted epub refered to as a kepub
+        Unfortunately Kobo decided to put the files on the device
+        with no file extension.  I just hope that decision causes
+        them as much grief as it does me :-)
+
+        This has to make a temporary copy of the book files with a
+        epub extension to allow Calibre's normal processing to
+        deal with the file appropriately
+        '''
+        for idx, path in enumerate(paths):
+            if path.find('kepub') >= 0:
+                with closing(open(path)) as r:
+                    tf = PersistentTemporaryFile(suffix='.epub')
+                    tf.write(r.read())
+                    paths[idx] = tf.name
+        return paths
+
+    def create_annotations_path(self, mdata, device_path=None):
+        if device_path:
+            return device_path
+        return USBMS.create_annotations_path(self, mdata)
+
+    def get_annotations(self, path_map):
+        EPUB_FORMATS = [u'epub']
+        epub_formats = set(EPUB_FORMATS)
+
+        def get_storage():
+            storage = []
+            if self._main_prefix:
+                storage.append(os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN))
+            if self._card_a_prefix:
+                storage.append(os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A))
+            if self._card_b_prefix:
+                storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B))
+            return storage
+
+        def resolve_bookmark_paths(storage, path_map):
+            pop_list = []
+            book_ext = {}
+            for id in path_map:
+                file_fmts = set()
+                for fmt in path_map[id]['fmts']:
+                    file_fmts.add(fmt)
+                bookmark_extension = None
+                if file_fmts.intersection(epub_formats):
+                    book_extension = list(file_fmts.intersection(epub_formats))[0]
+                    bookmark_extension = 'epub'
+
+                if bookmark_extension:
+                    for vol in storage:
+                        bkmk_path = path_map[id]['path']
+                        bkmk_path = bkmk_path
+                        if os.path.exists(bkmk_path):
+                            path_map[id] = bkmk_path
+                            book_ext[id] = book_extension
+                            break
+                    else:
+                        pop_list.append(id)
+                else:
+                    pop_list.append(id)
+
+            # Remove non-existent bookmark templates
+            for id in pop_list:
+                path_map.pop(id)
+            return path_map, book_ext
+
+        storage = get_storage()
+        path_map, book_ext = resolve_bookmark_paths(storage, path_map)
+
+        bookmarked_books = {}
+        for id in path_map:
+            extension =  os.path.splitext(path_map[id])[1]
+            ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(path_map[id])
+            ContentID = self.contentid_from_path(path_map[id], ContentType)
+
+            bookmark_ext = extension
+
+            db_path = self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')
+            myBookmark = Bookmark(db_path, ContentID, path_map[id], id, book_ext[id], bookmark_ext)
+            bookmarked_books[id] = self.UserAnnotation(type='kobo_bookmark', value=myBookmark)
+
+        # This returns as job.result in gui2.ui.annotations_fetched(self,job)
+        return bookmarked_books
+
+    def generate_annotation_html(self, bookmark):
+        from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
+        # Returns <div class="user_annotations"> ... </div>
+        #last_read_location = bookmark.last_read_location
+        #timestamp = bookmark.timestamp
+        percent_read = bookmark.percent_read
+        debug_print("Date: ",  bookmark.last_read)
+        if bookmark.last_read is not None:
+            try:
+                last_read = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(calendar.timegm(time.strptime(bookmark.last_read, "%Y-%m-%dT%H:%M:%S"))))
+            except:
+                last_read = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(calendar.timegm(time.strptime(bookmark.last_read, "%Y-%m-%dT%H:%M:%S.%f"))))
+        else:
+            #self.datetime = time.gmtime()
+            last_read = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
+
+        # debug_print("Percent read: ", percent_read)
+        ka_soup = BeautifulSoup()
+        dtc = 0
+        divTag = Tag(ka_soup,'div')
+        divTag['class'] = 'user_annotations'
+
+        # Add the last-read location
+        spanTag = Tag(ka_soup, 'span')
+        spanTag['style'] = 'font-weight:normal'
+        if bookmark.book_format == 'epub':
+            spanTag.insert(0,NavigableString(
+                _("<hr /><b>Book Last Read:</b> %(time)s<br /><b>Percentage Read:</b> %(pr)d%%<hr />") % \
+                            dict(time=last_read,
+                            #loc=last_read_location,
+                            pr=percent_read)))
+        else:
+            spanTag.insert(0,NavigableString(
+                _("<hr /><b>Book Last Read:</b> %(time)s<br /><b>Percentage Read:</b> %(pr)d%%<hr />") % \
+                            dict(time=last_read,
+                            #loc=last_read_location,
+                            pr=percent_read)))
+
+        divTag.insert(dtc, spanTag)
+        dtc += 1
+        divTag.insert(dtc, Tag(ka_soup,'br'))
+        dtc += 1
+
+        if bookmark.user_notes:
+            user_notes = bookmark.user_notes
+            annotations = []
+
+            # Add the annotations sorted by location
+            for location in sorted(user_notes):
+                if user_notes[location]['type'] == 'Bookmark':
+                    annotations.append(
+                        _('<b>Chapter %(chapter)d:</b> %(chapter_title)s<br /><b>%(typ)s</b><br /><b>Chapter Progress:</b> %(chapter_progress)s%%<br />%(annotation)s<br /><hr />') % \
+                            dict(chapter=user_notes[location]['chapter'],
+                                dl=user_notes[location]['displayed_location'],
+                                typ=user_notes[location]['type'],
+                                chapter_title=user_notes[location]['chapter_title'],
+                                chapter_progress=user_notes[location]['chapter_progress'],
+                                annotation=user_notes[location]['annotation'] if user_notes[location]['annotation'] is not None else ""))
+                elif user_notes[location]['type'] == 'Highlight':
+                    annotations.append(
+                        _('<b>Chapter %(chapter)d:</b> %(chapter_title)s<br /><b>%(typ)s</b><br /><b>Chapter Progress:</b> %(chapter_progress)s%%<br /><b>Highlight:</b> %(text)s<br /><hr />') % \
+                            dict(chapter=user_notes[location]['chapter'],
+                                dl=user_notes[location]['displayed_location'],
+                                typ=user_notes[location]['type'],
+                                chapter_title=user_notes[location]['chapter_title'],
+                                chapter_progress=user_notes[location]['chapter_progress'],
+                                text=user_notes[location]['text']))
+                elif user_notes[location]['type'] == 'Annotation':
+                    annotations.append(
+                        _('<b>Chapter %(chapter)d:</b> %(chapter_title)s<br /><b>%(typ)s</b><br /><b>Chapter Progress:</b> %(chapter_progress)s%%<br /><b>Highlight:</b> %(text)s<br /><b>Notes:</b> %(annotation)s<br /><hr />') % \
+                            dict(chapter=user_notes[location]['chapter'],
+                                dl=user_notes[location]['displayed_location'],
+                                typ=user_notes[location]['type'],
+                                chapter_title=user_notes[location]['chapter_title'],
+                                chapter_progress=user_notes[location]['chapter_progress'],
+                                text=user_notes[location]['text'],
+                                annotation=user_notes[location]['annotation']))
+                else:
+                    annotations.append(
+                        _('<b>Chapter %(chapter)d:</b> %(chapter_title)s<br /><b>%(typ)s</b><br /><b>Chapter Progress:</b> %(chapter_progress)s%%<br /><b>Highlight:</b> %(text)s<br /><b>Notes:</b> %(annotation)s<br /><hr />') % \
+                            dict(chapter=user_notes[location]['chapter'],
+                                dl=user_notes[location]['displayed_location'],
+                                typ=user_notes[location]['type'],
+                                chapter_title=user_notes[location]['chapter_title'],
+                                chapter_progress=user_notes[location]['chapter_progress'],
+                                text=user_notes[location]['text'], \
+                                annotation=user_notes[location]['annotation']))
+
+            for annotation in annotations:
+                divTag.insert(dtc, annotation)
+                dtc += 1
+
+        ka_soup.insert(0,divTag)
+        return ka_soup
+
+    def add_annotation_to_library(self, db, db_id, annotation):
+        from calibre.ebooks.BeautifulSoup import Tag
+        bm = annotation
+        ignore_tags = set(['Catalog', 'Clippings'])
+
+        if bm.type == 'kobo_bookmark':
+            mi = db.get_metadata(db_id, index_is_id=True)
+            user_notes_soup = self.generate_annotation_html(bm.value)
+            if mi.comments:
+                a_offset = mi.comments.find('<div class="user_annotations">')
+                ad_offset = mi.comments.find('<hr class="annotations_divider" />')
+
+                if a_offset >= 0:
+                    mi.comments = mi.comments[:a_offset]
+                if ad_offset >= 0:
+                    mi.comments = mi.comments[:ad_offset]
+                if set(mi.tags).intersection(ignore_tags):
+                    return
+                if mi.comments:
+                    hrTag = Tag(user_notes_soup,'hr')
+                    hrTag['class'] = 'annotations_divider'
+                    user_notes_soup.insert(0, hrTag)
+
+                mi.comments += unicode(user_notes_soup.prettify())
+            else:
+                mi.comments = unicode(user_notes_soup.prettify())
+            # Update library comments
+            db.set_comment(db_id, mi.comments)
+
+            # Add bookmark file to db_id
+            db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
+                                            bm.value.path, index_is_id=True)
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -84,7 +84,7 @@ class PDNOVEL(USBMS):
    FORMATS = ['epub', 'pdf']

    VENDOR_ID   = [0x18d1]
-    PRODUCT_ID  = [0xb004]
+    PRODUCT_ID  = [0xb004, 0xa004]
    BCD         = [0x224]

    VENDOR_NAME = 'ANDROID'
@ -224,7 +224,7 @@ class TREKSTOR(USBMS):
    FORMATS     = ['epub', 'txt', 'pdf']

    VENDOR_ID   = [0x1e68]
-    PRODUCT_ID  = [0x0041, 0x0042,
+    PRODUCT_ID  = [0x0041, 0x0042, 0x0052,
            0x003e # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091
            ]
    BCD         = [0x0002]
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -207,8 +207,11 @@ class PRS505(USBMS):
        c = self.initialize_XML_cache()
        blists = {}
        for i in c.paths:
-            if booklists[i] is not None:
-                blists[i] = booklists[i]
+            try:
+                if booklists[i] is not None:
+                    blists[i] = booklists[i]
+            except IndexError:
+                pass
        opts = self.settings()
        if opts.extra_customization:
            collections = [x.strip() for x in
@ -299,34 +302,3 @@ class PRS505(USBMS):
                f.write(metadata.thumbnail[-1])
            debug_print('Cover uploaded to: %r'%cpath)

-class PRST1(USBMS):
-    name           = 'SONY PRST1 and newer Device Interface'
-    gui_name       = 'SONY Reader'
-    description    = _('Communicate with Sony PRST1 and newer eBook readers')
-    author         = 'Kovid Goyal'
-    supported_platforms = ['windows', 'osx', 'linux']
-
-    FORMATS      = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
-    VENDOR_ID    = [0x054c]   #: SONY Vendor Id
-    PRODUCT_ID   = [0x05c2]
-    BCD          = [0x226]
-
-    VENDOR_NAME        = 'SONY'
-    WINDOWS_MAIN_MEM   = re.compile(
-            r'(PRS-T1&)'
-            )
-
-    THUMBNAIL_HEIGHT = 217
-    SCAN_FROM_ROOT = True
-    EBOOK_DIR_MAIN = __appname__
-
-
-    def windows_filter_pnp_id(self, pnp_id):
-        return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
-
-    def get_carda_ebook_dir(self, for_upload=False):
-        if for_upload:
-            return __appname__
-        return self.EBOOK_DIR_CARD_A
-
-
--- a/src/calibre/devices/prst1/init.py
+++ b/src/calibre/devices/prst1/init.py
@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -0,0 +1,575 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Device driver for the SONY T1 devices
+'''
+
+import os, time, re
+import sqlite3 as sqlite
+from contextlib import closing
+from datetime import date
+
+from calibre.devices.usbms.driver import USBMS, debug_print
+from calibre.devices.usbms.device import USBDevice
+from calibre.devices.usbms.books import CollectionsBookList
+from calibre.devices.usbms.books import BookList
+from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
+from calibre.constants import islinux
+
+DBPATH = 'Sony_Reader/database/books.db'
+THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
+
+class ImageWrapper(object):
+    def __init__(self, image_path):
+        self.image_path = image_path
+
+class PRST1(USBMS):
+    name           = 'SONY PRST1 and newer Device Interface'
+    gui_name       = 'SONY Reader'
+    description    = _('Communicate with the PRST1 and newer SONY eBook readers')
+    author         = 'Kovid Goyal'
+    supported_platforms = ['windows', 'osx', 'linux']
+    path_sep = '/'
+    booklist_class = CollectionsBookList
+
+    FORMATS      = ['epub', 'pdf', 'txt', 'book', 'zbf'] # The last two are
+                                                         # used in japan
+    CAN_SET_METADATA = ['collections']
+    CAN_DO_DEVICE_DB_PLUGBOARD = True
+
+    VENDOR_ID    = [0x054c]   #: SONY Vendor Id
+    PRODUCT_ID   = [0x05c2]
+    BCD          = [0x226]
+
+    VENDOR_NAME        = 'SONY'
+    WINDOWS_MAIN_MEM   = re.compile(
+            r'(PRS-T1&)'
+            )
+    WINDOWS_CARD_A_MEM = re.compile(
+            r'(PRS-T1__SD&)'
+            )
+    MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
+    STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'
+
+    THUMBNAIL_HEIGHT = 144
+    SUPPORTS_SUB_DIRS = True
+    SUPPORTS_USE_AUTHOR_SORT = True
+    MUST_READ_METADATA = True
+    EBOOK_DIR_MAIN   = 'Sony_Reader/media/books'
+
+    EXTRA_CUSTOMIZATION_MESSAGE = [
+        _('Comma separated list of metadata fields '
+            'to turn into collections on the device. Possibilities include: ')+\
+                    'series, tags, authors',
+        _('Upload separate cover thumbnails for books') +
+             ':::'+_('Normally, the SONY readers get the cover image from the'
+             ' ebook file itself. With this option, calibre will send a '
+             'separate cover image to the reader, useful if you are '
+             'sending DRMed books in which you cannot change the cover.'),
+        _('Refresh separate covers when using automatic management') +
+             ':::' +
+              _('Set this option to have separate book covers uploaded '
+                'every time you connect your device. Unset this option if '
+                'you have so many books on the reader that performance is '
+                'unacceptable.'),
+        _('Preserve cover aspect ratio when building thumbnails') +
+              ':::' +
+              _('Set this option if you want the cover thumbnails to have '
+                'the same aspect ratio (width to height) as the cover. '
+                'Unset it if you want the thumbnail to be the maximum size, '
+                'ignoring aspect ratio.'),
+        _('Use SONY Author Format (First Author Only)') +
+              ':::' +
+              _('Set this option if you want the author on the Sony to '
+                'appear the same way the T1 sets it. This means it will '
+                'only show the first author for books with multiple authors. '
+                'Leave this disabled if you use Metadata Plugboards.')
+    ]
+    EXTRA_CUSTOMIZATION_DEFAULT = [
+                ', '.join(['series', 'tags']),
+                True,
+                False,
+                True,
+                False,
+    ]
+
+    OPT_COLLECTIONS    = 0
+    OPT_UPLOAD_COVERS  = 1
+    OPT_REFRESH_COVERS = 2
+    OPT_PRESERVE_ASPECT_RATIO = 3
+    OPT_USE_SONY_AUTHORS = 4
+
+    plugboards = None
+    plugboard_func = None
+
+    def post_open_callback(self):
+        # Set the thumbnail width to the theoretical max if the user has asked
+        # that we do not preserve aspect ratio
+        ec = self.settings().extra_customization
+        if not ec[self.OPT_PRESERVE_ASPECT_RATIO]:
+            self.THUMBNAIL_WIDTH = 108
+        self.WANTS_UPDATED_THUMBNAILS = ec[self.OPT_REFRESH_COVERS]
+        # Make sure the date offset is set to none, we'll calculate it in books.
+        self.device_offset = None
+
+    def windows_filter_pnp_id(self, pnp_id):
+        return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
+
+    def get_carda_ebook_dir(self, for_upload=False):
+        if for_upload:
+            return self.EBOOK_DIR_MAIN
+        return self.EBOOK_DIR_CARD_A
+
+    def get_main_ebook_dir(self, for_upload=False):
+        if for_upload:
+            return self.EBOOK_DIR_MAIN
+        return ''
+
+    def can_handle(self, devinfo, debug=False):
+        if islinux:
+            dev = USBDevice(devinfo)
+            main, carda, cardb = self.find_device_nodes(detected_device=dev)
+            if main is None and carda is None and cardb is None:
+                if debug:
+                    print ('\tPRS-T1: Appears to be in non data mode'
+                            ' or was ejected, ignoring')
+                return False
+        return True
+
+    def books(self, oncard=None, end_session=True):
+        dummy_bl = BookList(None, None, None)
+
+        if (
+                (oncard == 'carda' and not self._card_a_prefix) or
+                (oncard and oncard != 'carda')
+            ):
+            self.report_progress(1.0, _('Getting list of books on device...'))
+            return dummy_bl
+
+        prefix = self._card_a_prefix if oncard == 'carda' else self._main_prefix
+
+        # Let parent driver get the books
+        self.booklist_class.rebuild_collections = self.rebuild_collections
+        bl = USBMS.books(self, oncard=oncard, end_session=end_session)
+
+        dbpath = self.normalize_path(prefix + DBPATH)
+        debug_print("SQLite DB Path: " + dbpath)
+
+        with closing(sqlite.connect(dbpath)) as connection:
+            # Replace undecodable characters in the db instead of erroring out
+            connection.text_factory = lambda x: unicode(x, "utf-8", "replace")
+
+            cursor = connection.cursor()
+            # Query collections
+            query = '''
+                SELECT books._id, collection.title
+                    FROM collections
+                    LEFT OUTER JOIN books
+                    LEFT OUTER JOIN collection
+                    WHERE collections.content_id = books._id AND
+                    collections.collection_id = collection._id
+                '''
+            cursor.execute(query)
+
+            bl_collections = {}
+            for i, row in enumerate(cursor):
+                bl_collections.setdefault(row[0], [])
+                bl_collections[row[0]].append(row[1])
+
+            # collect information on offsets, but assume any
+            # offset we already calculated is correct
+            if self.device_offset is None:
+                query = 'SELECT file_path, modified_date FROM books'
+                cursor.execute(query)
+
+                time_offsets = {}
+                for i, row in enumerate(cursor):
+                    comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
+                    device_date = int(row[1]);
+                    offset = device_date - comp_date
+                    time_offsets.setdefault(offset, 0)
+                    time_offsets[offset] = time_offsets[offset] + 1
+
+                try:
+                    device_offset = max(time_offsets,key = lambda a: time_offsets.get(a))
+                    debug_print("Device Offset: %d ms"%device_offset)
+                    self.device_offset = device_offset
+                except ValueError:
+                    debug_print("No Books To Detect Device Offset.")
+
+            for idx, book in enumerate(bl):
+                query = 'SELECT _id, thumbnail FROM books WHERE file_path = ?'
+                t = (book.lpath,)
+                cursor.execute (query, t)
+
+                for i, row in enumerate(cursor):
+                    book.device_collections = bl_collections.get(row[0], None)
+                    thumbnail = row[1]
+                    if thumbnail is not None:
+                        thumbnail = self.normalize_path(prefix + thumbnail)
+                        book.thumbnail = ImageWrapper(thumbnail)
+
+            cursor.close()
+
+        return bl
+
+    def set_plugboards(self, plugboards, pb_func):
+        self.plugboards = plugboards
+        self.plugboard_func = pb_func
+
+    def sync_booklists(self, booklists, end_session=True):
+        debug_print('PRST1: starting sync_booklists')
+
+        opts = self.settings()
+        if opts.extra_customization:
+            collections = [x.strip() for x in
+                    opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
+        else:
+            collections = []
+        debug_print('PRST1: collection fields:', collections)
+
+        if booklists[0] is not None:
+            self.update_device_database(booklists[0], collections, None)
+        if booklists[1] is not None:
+            self.update_device_database(booklists[1], collections, 'carda')
+
+        USBMS.sync_booklists(self, booklists, end_session=end_session)
+        debug_print('PRST1: finished sync_booklists')
+
+    def update_device_database(self, booklist, collections_attributes, oncard):
+        debug_print('PRST1: starting update_device_database')
+
+        plugboard = None
+        if self.plugboard_func:
+            plugboard = self.plugboard_func(self.__class__.__name__,
+                    'device_db', self.plugboards)
+            debug_print("PRST1: Using Plugboard", plugboard)
+
+        prefix = self._card_a_prefix if oncard == 'carda' else self._main_prefix
+        if prefix is None:
+            # Reader has no sd card inserted
+            return
+        source_id = 1 if oncard == 'carda' else 0
+
+        dbpath = self.normalize_path(prefix + DBPATH)
+        debug_print("SQLite DB Path: " + dbpath)
+
+        collections = booklist.get_collections(collections_attributes)
+
+        with closing(sqlite.connect(dbpath)) as connection:
+            self.update_device_books(connection, booklist, source_id, plugboard)
+            self.update_device_collections(connection, booklist, collections, source_id)
+
+        debug_print('PRST1: finished update_device_database')
+
+    def update_device_books(self, connection, booklist, source_id, plugboard):
+        opts = self.settings()
+        upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
+        refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
+        use_sony_authors = opts.extra_customization[self.OPT_USE_SONY_AUTHORS]
+
+        cursor = connection.cursor()
+
+        # Get existing books
+        query = 'SELECT file_path, _id FROM books'
+        cursor.execute(query)
+
+        db_books = {}
+        for i, row in enumerate(cursor):
+            lpath = row[0].replace('\\', '/')
+            db_books[lpath] = row[1]
+
+        for book in booklist:
+            # Run through plugboard if needed
+            if plugboard is not None:
+                newmi = book.deepcopy_metadata()
+                newmi.template_to_attribute(book, plugboard)
+            else:
+                newmi = book
+
+            # Get Metadata We Want
+            lpath = book.lpath
+            try:
+                if opts.use_author_sort:
+                    if newmi.author_sort:
+                        author = newmi.author_sort
+                    else:
+                        author = authors_to_sort_string(newmi.authors)
+                else:
+                    if use_sony_authors:
+                        author = newmi.authors[0]
+                    else:
+                        author = authors_to_string(newmi.authors)
+            except:
+                author = _('Unknown')
+            title = newmi.title or _('Unknown')
+
+            # Get modified date
+            modified_date = os.path.getmtime(book.path) * 1000
+            if self.device_offset is not None:
+                modified_date = modified_date + self.device_offset
+            else:
+                time_offset = -time.altzone if time.daylight else -time.timezone
+                modified_date = modified_date + (time_offset * 1000)
+
+            if lpath not in db_books:
+                query = '''
+                INSERT INTO books
+                (title, author, source_id, added_date, modified_date,
+                file_path, file_name, file_size, mime_type, corrupted,
+                prevent_delete)
+                values (?,?,?,?,?,?,?,?,?,0,0)
+                '''
+                t = (title, author, source_id, int(time.time() * 1000),
+                        modified_date, lpath,
+                        os.path.basename(lpath), book.size, book.mime)
+                cursor.execute(query, t)
+                book.bookId = cursor.lastrowid
+                if upload_covers:
+                    self.upload_book_cover(connection, book, source_id)
+                debug_print('Inserted New Book: ' + book.title)
+            else:
+                query = '''
+                UPDATE books
+                SET title = ?, author = ?, modified_date = ?, file_size = ?
+                WHERE file_path = ?
+                '''
+                t = (title, author, modified_date, book.size, lpath)
+                cursor.execute(query, t)
+                book.bookId = db_books[lpath]
+                if refresh_covers:
+                    self.upload_book_cover(connection, book, source_id)
+                db_books[lpath] = None
+
+            if self.is_sony_periodical(book):
+                self.periodicalize_book(connection, book)
+
+        for book, bookId in db_books.items():
+            if bookId is not None:
+                # Remove From Collections
+                query = 'DELETE FROM collections WHERE content_id = ?'
+                t = (bookId,)
+                cursor.execute(query, t)
+                # Remove from Books
+                query = 'DELETE FROM books where _id = ?'
+                t = (bookId,)
+                cursor.execute(query, t)
+                debug_print('Deleted Book:' + book)
+
+        connection.commit()
+        cursor.close()
+
+    def update_device_collections(self, connection, booklist, collections,
+            source_id):
+        cursor = connection.cursor()
+
+        if collections:
+            # Get existing collections
+            query = 'SELECT _id, title FROM collection'
+            cursor.execute(query)
+
+            db_collections = {}
+            for i, row in enumerate(cursor):
+                db_collections[row[1]] = row[0]
+
+            for collection, books in collections.items():
+                if collection not in db_collections:
+                    query = 'INSERT INTO collection (title, source_id) VALUES (?,?)'
+                    t = (collection, source_id)
+                    cursor.execute(query, t)
+                    db_collections[collection] = cursor.lastrowid
+                    debug_print('Inserted New Collection: ' + collection)
+
+                # Get existing books in collection
+                query = '''
+                SELECT books.file_path, content_id
+                FROM collections
+                LEFT OUTER JOIN books
+                WHERE collection_id = ? AND books._id = collections.content_id
+                '''
+                t = (db_collections[collection],)
+                cursor.execute(query, t)
+
+                db_books = {}
+                for i, row in enumerate(cursor):
+                    db_books[row[0]] = row[1]
+
+                for idx, book in enumerate(books):
+                    if collection not in book.device_collections:
+                        book.device_collections.append(collection)
+                    if db_books.get(book.lpath, None) is None:
+                        query = '''
+                        INSERT INTO collections (collection_id, content_id,
+                        added_order) values (?,?,?)
+                        '''
+                        t = (db_collections[collection], book.bookId, idx)
+                        cursor.execute(query, t)
+                        debug_print('Inserted Book Into Collection: ' +
+                                book.title + ' -> ' + collection)
+                    else:
+                        query = '''
+                        UPDATE collections
+                        SET added_order = ?
+                        WHERE content_id = ? AND collection_id = ?
+                        '''
+                        t = (idx, book.bookId, db_collections[collection])
+                        cursor.execute(query, t)
+
+                    db_books[book.lpath] = None
+
+                for bookPath, bookId in db_books.items():
+                    if bookId is not None:
+                        query = ('DELETE FROM collections '
+                                'WHERE content_id = ? AND collection_id = ? ')
+                        t = (bookId, db_collections[collection],)
+                        cursor.execute(query, t)
+                        debug_print('Deleted Book From Collection: ' + bookPath
+                                + ' -> ' + collection)
+
+                db_collections[collection] = None
+
+            for collection, collectionId in db_collections.items():
+                if collectionId is not None:
+                    # Remove Books from Collection
+                    query = ('DELETE FROM collections '
+                            'WHERE collection_id = ?')
+                    t = (collectionId,)
+                    cursor.execute(query, t)
+                    # Remove Collection
+                    query = ('DELETE FROM collection '
+                            'WHERE _id = ?')
+                    t = (collectionId,)
+                    cursor.execute(query, t)
+                    debug_print('Deleted Collection: ' + collection)
+
+
+        connection.commit()
+        cursor.close()
+
+    def rebuild_collections(self, booklist, oncard):
+        debug_print('PRST1: starting rebuild_collections')
+
+        opts = self.settings()
+        if opts.extra_customization:
+            collections = [x.strip() for x in
+                    opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
+        else:
+            collections = []
+        debug_print('PRST1: collection fields:', collections)
+
+        self.update_device_database(booklist, collections, oncard)
+
+        debug_print('PRS-T1: finished rebuild_collections')
+
+    def upload_cover(self, path, filename, metadata, filepath):
+        debug_print('PRS-T1: uploading cover')
+
+        if filepath.startswith(self._main_prefix):
+            prefix = self._main_prefix
+            source_id = 0
+        else:
+            prefix = self._card_a_prefix
+            source_id = 1
+
+        metadata.lpath = filepath.partition(prefix)[2]
+        metadata.lpath = metadata.lpath.replace('\\', '/')
+        dbpath = self.normalize_path(prefix + DBPATH)
+        debug_print("SQLite DB Path: " + dbpath)
+
+        with closing(sqlite.connect(dbpath)) as connection:
+            cursor = connection.cursor()
+
+            query = 'SELECT _id FROM books WHERE file_path = ?'
+            t = (metadata.lpath,)
+            cursor.execute(query, t)
+
+            for i, row in enumerate(cursor):
+                metadata.bookId = row[0]
+
+            cursor.close()
+
+            if getattr(metadata, 'bookId', None) is not None:
+                debug_print('PRS-T1: refreshing cover for book being sent')
+                self.upload_book_cover(connection, metadata, source_id)
+
+        debug_print('PRS-T1: done uploading cover')
+
+    def upload_book_cover(self, connection, book, source_id):
+        debug_print('PRST1: Uploading/Refreshing Cover for ' + book.title)
+        if not book.thumbnail or not book.thumbnail[-1]:
+            return
+        cursor = connection.cursor()
+
+        thumbnail_path = THUMBPATH%book.bookId
+
+        prefix = self._main_prefix if source_id is 0 else self._card_a_prefix
+        thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/'))
+        thumbnail_dir_path = os.path.dirname(thumbnail_file_path)
+        if not os.path.exists(thumbnail_dir_path):
+            os.makedirs(thumbnail_dir_path)
+
+        with open(thumbnail_file_path, 'wb') as f:
+            f.write(book.thumbnail[-1])
+
+        query = 'UPDATE books SET thumbnail = ? WHERE _id = ?'
+        t = (thumbnail_path, book.bookId,)
+        cursor.execute(query, t)
+
+        connection.commit()
+        cursor.close()
+
+    def is_sony_periodical(self, book):
+        if _('News') not in book.tags:
+            return False
+        if not book.lpath.lower().endswith('.epub'):
+            return False
+        if book.pubdate.date() < date(2010, 10, 17):
+            return False
+        return True
+
+    def periodicalize_book(self, connection, book):
+        if not self.is_sony_periodical(book):
+            return
+
+        name = None
+        if '[' in book.title:
+            name = book.title.split('[')[0].strip()
+            if len(name) < 4:
+                name = None
+        if not name:
+            try:
+                name = [t for t in book.tags if t != _('News')][0]
+            except:
+                name = None
+
+        if not name:
+            name = book.title
+
+        pubdate = None
+        try:
+            pubdate = int(time.mktime(book.pubdate.timetuple()) * 1000)
+        except:
+            pass
+
+        cursor = connection.cursor()
+
+        query = '''
+        UPDATE books
+        SET conforms_to = 'http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0',
+            periodical_name = ?,
+            description = ?,
+            publication_date = ?
+        WHERE _id = ?
+        '''
+        t = (name, None, pubdate, book.bookId,)
+        cursor.execute(query, t)
+
+        connection.commit()
+        cursor.close()
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -483,7 +483,7 @@ class Device(DeviceConfig, DevicePlugin):
        self._card_a_prefix = get_card_prefix('carda')
        self._card_b_prefix = get_card_prefix('cardb')

-    def find_device_nodes(self):
+    def find_device_nodes(self, detected_device=None):

        def walk(base):
            base = os.path.abspath(os.path.realpath(base))
@ -507,8 +507,11 @@ class Device(DeviceConfig, DevicePlugin):
        d, j = os.path.dirname, os.path.join
        usb_dir = None

+        if detected_device is None:
+            detected_device = self.detected_device
+
        def test(val, attr):
-            q = getattr(self.detected_device, attr)
+            q = getattr(detected_device, attr)
            return q == val

        for x, isfile in walk('/sys/devices'):
@ -596,6 +599,8 @@ class Device(DeviceConfig, DevicePlugin):
                label = self.STORAGE_CARD2_VOLUME_LABEL
                if not label:
                    label = self.STORAGE_CARD_VOLUME_LABEL + ' 2'
+            if not label:
+                label = 'E-book Reader (%s)'%type
            extra = 0
            while True:
                q = ' (%d)'%extra if extra else ''
@ -1063,6 +1068,12 @@ class Device(DeviceConfig, DevicePlugin):
        '''
        return {}

+    def add_annotation_to_library(self, db, db_id, annotation):
+        '''
+        Add an annotation to the calibre library
+        '''
+        pass
+
    def create_upload_path(self, path, mdata, fname, create_dirs=True):
        path = os.path.abspath(path)
        maxlen = self.MAX_PATH_LEN
@ -1142,3 +1153,6 @@ class Device(DeviceConfig, DevicePlugin):
            os.makedirs(filedir)

        return filepath
+
+    def create_annotations_path(self, mdata, device_path=None):
+         return self.create_upload_path(os.path.abspath('/<storage>'), mdata, 'x.bookmark', create_dirs=False)
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -30,7 +30,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
                   'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
                   'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
-                   'xps', 'oxps', 'azw4']
+                   'xps', 'oxps', 'azw4', 'book', 'zbf']

 class HTMLRenderer(object):

--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
    def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
        from calibre.ebooks.chm.reader import CHMReader
        log.debug('Opening CHM file')
-        rdr = CHMReader(chm_path, log, self.opts)
+        rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding)
        log.debug('Extracting CHM to %s' % output_dir)
        rdr.extract_content(output_dir, debug_dump=debug_dump)
        self._chm_reader = rdr
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -40,14 +40,14 @@ class CHMError(Exception):
    pass

 class CHMReader(CHMFile):
-    def __init__(self, input, log, opts):
+    def __init__(self, input, log, input_encoding=None):
        CHMFile.__init__(self)
        if isinstance(input, unicode):
            input = input.encode(filesystem_encoding)
        if not self.LoadCHM(input):
            raise CHMError("Unable to open CHM file '%s'"%(input,))
        self.log = log
-        self.opts = opts
+        self.input_encoding = input_encoding
        self._sourcechm = input
        self._contents = None
        self._playorder = 0
@ -156,8 +156,8 @@ class CHMReader(CHMFile):
                    break

    def _reformat(self, data, htmlpath):
-        if self.opts.input_encoding:
-            data = data.decode(self.opts.input_encoding)
+        if self.input_encoding:
+            data = data.decode(self.input_encoding)
        try:
            data = xml_to_unicode(data, strip_encoding_pats=True)[0]
            soup = BeautifulSoup(data)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
    def unarchive(self, path, tdir):
        extract(path, tdir)
        files = list(walk(tdir))
+        files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
+                for f in files]
        from calibre.customize.ui import available_input_formats
        fmts = available_input_formats()
        for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
--- a/src/calibre/ebooks/djvu/init.py
+++ b/src/calibre/ebooks/djvu/init.py
@ -0,0 +1,12 @@
+#!/usr/bin/env  python
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Anthon van der Neut <anthon@mnt.org>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Used for DJVU input
+'''
+
--- a/src/calibre/ebooks/djvu/djvu.py
+++ b/src/calibre/ebooks/djvu/djvu.py
@ -0,0 +1,146 @@
+#! /usr/bin/env python
+# coding: utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
+
+# this code is based on:
+# Lizardtech DjVu Reference
+# DjVu v3
+# November 2005
+
+import sys
+import struct
+from cStringIO import StringIO
+
+from .djvubzzdec import BZZDecoder
+
+class DjvuChunk(object):
+    def __init__(self, buf, start, end, align=True, bigendian=True,
+            inclheader=False, verbose=0):
+        self.subtype = None
+        self._subchunks = []
+        self.buf = buf
+        pos = start + 4
+        self.type = buf[start:pos]
+        self.align = align      # whether to align to word (2-byte) boundaries
+        self.headersize = 0 if inclheader else 8
+        if bigendian:
+            self.strflag = b'>'
+        else:
+            self.strflag = b'<'
+        oldpos, pos = pos, pos+4
+        self.size = struct.unpack(self.strflag+b'L', buf[oldpos:pos])[0]
+        self.dataend = pos + self.size - (8 if inclheader else 0)
+        if self.type == b'FORM':
+            oldpos, pos = pos, pos+4
+            #print oldpos, pos
+            self.subtype = buf[oldpos:pos]
+            #self.headersize += 4
+        self.datastart = pos
+        if verbose > 0:
+            print ('found', self.type, self.subtype, pos, self.size)
+        if self.type in b'FORM'.split():
+            if verbose > 0:
+                print ('processing substuff %d %d (%x)' % (pos, self.dataend,
+                    self.dataend))
+            numchunks = 0
+            while pos < self.dataend:
+                x = DjvuChunk(buf, pos, start+self.size, verbose=verbose)
+                numchunks += 1
+                self._subchunks.append(x)
+                newpos = pos + x.size + x.headersize + (1 if (x.size % 2) else 0)
+                if verbose > 0:
+                    print ('newpos %d %d (%x, %x) %d' % (newpos, self.dataend,
+                        newpos, self.dataend, x.headersize))
+                pos = newpos
+            if verbose > 0:
+                print ('                  end of chunk %d (%x)' % (pos, pos))
+
+    def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100):
+        if out:
+            out.write(b'  ' * indent)
+            out.write(b'%s%s [%d]\n' % (self.type,
+                b':' + self.subtype if self.subtype else b'', self.size))
+        if txtout and self.type == b'TXTz':
+            inbuf = StringIO(self.buf[self.datastart: self.dataend])
+            outbuf = StringIO()
+            decoder = BZZDecoder(inbuf, outbuf)
+            while True:
+                xxres = decoder.convert(1024 * 1024)
+                if not xxres:
+                    break
+            res = outbuf.getvalue()
+            l = 0
+            for x in res[:3]:
+                l <<= 8
+                l += ord(x)
+            if verbose > 0 and out:
+                print >> out, l
+            txtout.write(res[3:3+l])
+            txtout.write(b'\n\f')
+        if txtout and self.type == b'TXTa':
+            res = self.buf[self.datastart: self.dataend]
+            l = 0
+            for x in res[:3]:
+                l <<= 8
+                l += ord(x)
+            if verbose > 0 and out:
+                print >> out, l
+            txtout.write(res[3:3+l])
+            txtout.write(b'\n\f')
+        if indent >= maxlevel:
+            return
+        for schunk in self._subchunks:
+            schunk.dump(verbose=verbose, indent=indent+1, out=out, txtout=txtout)
+
+class DJVUFile(object):
+    def __init__(self, instream, verbose=0):
+        self.instream = instream
+        buf = self.instream.read(4)
+        assert(buf == b'AT&T')
+        buf = self.instream.read()
+        self.dc = DjvuChunk(buf, 0, len(buf), verbose=verbose)
+
+    def get_text(self, outfile=None):
+        self.dc.dump(txtout=outfile)
+
+    def dump(self, outfile=None, maxlevel=0):
+        self.dc.dump(out=outfile, maxlevel=maxlevel)
+
+def main():
+    from ruamel.util.program import Program
+    class DJVUDecoder(Program):
+        def __init__(self):
+            Program.__init__(self)
+
+        def parser_setup(self):
+            Program.parser_setup(self)
+            #self._argparser.add_argument('--combine', '-c', action=CountAction, const=1, nargs=0)
+            #self._argparser.add_argument('--combine', '-c', type=int, default=1)
+            #self._argparser.add_argument('--segments', '-s', action='append', nargs='+')
+            #self._argparser.add_argument('--force', '-f', action='store_true')
+            #self._argparser.add_argument('classname')
+            self._argparser.add_argument('--text', '-t', action='store_true')
+            self._argparser.add_argument('--dump', type=int, default=0)
+            self._argparser.add_argument('file', nargs='+')
+
+        def run(self):
+            if self._args.verbose > 1: # can be negative with --quiet
+                print (self._args.file)
+            x = DJVUFile(file(self._args.file[0], 'rb'), verbose=self._args.verbose)
+            if self._args.text:
+                print (x.get_text(sys.stdout))
+            if self._args.dump:
+                x.dump(sys.stdout, maxlevel=self._args.dump)
+            return 0
+
+    tt = DJVUDecoder()
+    res = tt.result
+    if res != 0:
+        print (res)
+
+if __name__ == '__main__':
+    main()
--- a/src/calibre/ebooks/djvu/djvubzzdec.py
+++ b/src/calibre/ebooks/djvu/djvubzzdec.py
@ -0,0 +1,746 @@
+#! /usr/bin/env python
+# coding: utf-8
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
+#__docformat__ = 'restructuredtext en'
+
+# Copyright (C) 2011 Anthon van der Neut, Ruamel bvba
+# Adapted from Leon Bottou's djvulibre C++ code,
+# ( ZPCodec.{cpp,h} and BSByteStream.{cpp,h} )
+# that code was first converted to C removing any dependencies on the DJVU libre
+# framework for ByteStream, making it into a ctypes callable shared object
+# then to python, and remade into a class
+original_copyright_notice = '''
+//C- -------------------------------------------------------------------
+//C- DjVuLibre-3.5
+//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
+//C- Copyright (c) 2001  AT&T
+//C-
+//C- This software is subject to, and may be distributed under, the
+//C- GNU General Public License, either Version 2 of the license,
+//C- or (at your option) any later version. The license should have
+//C- accompanied the software or you may obtain a copy of the license
+//C- from the Free Software Foundation at http://www.fsf.org .
+//C-
+//C- This program is distributed in the hope that it will be useful,
+//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
+//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//C- GNU General Public License for more details.
+//C-
+//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
+//C- Lizardtech Software.  Lizardtech Software has authorized us to
+//C- replace the original DjVu(r) Reference Library notice by the following
+//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
+//C-
+//C-  ------------------------------------------------------------------
+//C- | DjVu (r) Reference Library (v. 3.5)
+//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
+//C- | The DjVu Reference Library is protected by U.S. Pat. No.
+//C- | 6,058,214 and patents pending.
+//C- |
+//C- | This software is subject to, and may be distributed under, the
+//C- | GNU General Public License, either Version 2 of the license,
+//C- | or (at your option) any later version. The license should have
+//C- | accompanied the software or you may obtain a copy of the license
+//C- | from the Free Software Foundation at http://www.fsf.org .
+//C- |
+//C- | The computer code originally released by LizardTech under this
+//C- | license and unmodified by other parties is deemed "the LIZARDTECH
+//C- | ORIGINAL CODE."  Subject to any third party intellectual property
+//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
+//C- | non-exclusive license to make, use, sell, or otherwise dispose of
+//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
+//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
+//C- | General Public License.   This grant only confers the right to
+//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
+//C- | the extent such infringement is reasonably necessary to enable
+//C- | recipient to make, have made, practice, sell, or otherwise dispose
+//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
+//C- | any greater extent that may be necessary to utilize further
+//C- | modifications or combinations.
+//C- |
+//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
+//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
+//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+//C- +------------------------------------------------------------------
+//
+// $Id: BSByteStream.cpp,v 1.9 2007/03/25 20:48:29 leonb Exp $
+// $Name: release_3_5_23 $
+'''
+
+
+MAXBLOCK = 4096
+FREQMAX = 4
+CTXIDS = 3
+MAXLEN = 1024 ** 2
+
+# Exception classes used by this module.
+class BZZDecoderError(Exception):
+    """This exception is raised when BZZDecode runs into trouble
+    """
+    def __init__(self, msg):
+        self.msg = msg
+    def __str__(self):
+        return "BZZDecoderError: %s" % (self.msg)
+
+
+# This table has been designed for the ZPCoder
+#   * by running the following command in file 'zptable.sn':
+#   * (fast-crude (steady-mat 0.0035  0.0002) 260)))
+default_ztable = [ # {{{
+  (0x8000, 0x0000, 84, 145),    # 000: p=0.500000 (    0,    0)
+  (0x8000, 0x0000, 3, 4),       # 001: p=0.500000 (    0,    0)
+  (0x8000, 0x0000, 4, 3),       # 002: p=0.500000 (    0,    0)
+  (0x6bbd, 0x10a5, 5, 1),       # 003: p=0.465226 (    0,    0)
+  (0x6bbd, 0x10a5, 6, 2),       # 004: p=0.465226 (    0,    0)
+  (0x5d45, 0x1f28, 7, 3),       # 005: p=0.430708 (    0,    0)
+  (0x5d45, 0x1f28, 8, 4),       # 006: p=0.430708 (    0,    0)
+  (0x51b9, 0x2bd3, 9, 5),       # 007: p=0.396718 (    0,    0)
+  (0x51b9, 0x2bd3, 10, 6),      # 008: p=0.396718 (    0,    0)
+  (0x4813, 0x36e3, 11, 7),      # 009: p=0.363535 (    0,    0)
+  (0x4813, 0x36e3, 12, 8),      # 010: p=0.363535 (    0,    0)
+  (0x3fd5, 0x408c, 13, 9),      # 011: p=0.331418 (    0,    0)
+  (0x3fd5, 0x408c, 14, 10),     # 012: p=0.331418 (    0,    0)
+  (0x38b1, 0x48fd, 15, 11),     # 013: p=0.300585 (    0,    0)
+  (0x38b1, 0x48fd, 16, 12),     # 014: p=0.300585 (    0,    0)
+  (0x3275, 0x505d, 17, 13),     # 015: p=0.271213 (    0,    0)
+  (0x3275, 0x505d, 18, 14),     # 016: p=0.271213 (    0,    0)
+  (0x2cfd, 0x56d0, 19, 15),     # 017: p=0.243438 (    0,    0)
+  (0x2cfd, 0x56d0, 20, 16),     # 018: p=0.243438 (    0,    0)
+  (0x2825, 0x5c71, 21, 17),     # 019: p=0.217391 (    0,    0)
+  (0x2825, 0x5c71, 22, 18),     # 020: p=0.217391 (    0,    0)
+  (0x23ab, 0x615b, 23, 19),     # 021: p=0.193150 (    0,    0)
+  (0x23ab, 0x615b, 24, 20),     # 022: p=0.193150 (    0,    0)
+  (0x1f87, 0x65a5, 25, 21),     # 023: p=0.170728 (    0,    0)
+  (0x1f87, 0x65a5, 26, 22),     # 024: p=0.170728 (    0,    0)
+  (0x1bbb, 0x6962, 27, 23),     # 025: p=0.150158 (    0,    0)
+  (0x1bbb, 0x6962, 28, 24),     # 026: p=0.150158 (    0,    0)
+  (0x1845, 0x6ca2, 29, 25),     # 027: p=0.131418 (    0,    0)
+  (0x1845, 0x6ca2, 30, 26),     # 028: p=0.131418 (    0,    0)
+  (0x1523, 0x6f74, 31, 27),     # 029: p=0.114460 (    0,    0)
+  (0x1523, 0x6f74, 32, 28),     # 030: p=0.114460 (    0,    0)
+  (0x1253, 0x71e6, 33, 29),     # 031: p=0.099230 (    0,    0)
+  (0x1253, 0x71e6, 34, 30),     # 032: p=0.099230 (    0,    0)
+  (0x0fcf, 0x7404, 35, 31),     # 033: p=0.085611 (    0,    0)
+  (0x0fcf, 0x7404, 36, 32),     # 034: p=0.085611 (    0,    0)
+  (0x0d95, 0x75d6, 37, 33),     # 035: p=0.073550 (    0,    0)
+  (0x0d95, 0x75d6, 38, 34),     # 036: p=0.073550 (    0,    0)
+  (0x0b9d, 0x7768, 39, 35),     # 037: p=0.062888 (    0,    0)
+  (0x0b9d, 0x7768, 40, 36),     # 038: p=0.062888 (    0,    0)
+  (0x09e3, 0x78c2, 41, 37),     # 039: p=0.053539 (    0,    0)
+  (0x09e3, 0x78c2, 42, 38),     # 040: p=0.053539 (    0,    0)
+  (0x0861, 0x79ea, 43, 39),     # 041: p=0.045365 (    0,    0)
+  (0x0861, 0x79ea, 44, 40),     # 042: p=0.045365 (    0,    0)
+  (0x0711, 0x7ae7, 45, 41),     # 043: p=0.038272 (    0,    0)
+  (0x0711, 0x7ae7, 46, 42),     # 044: p=0.038272 (    0,    0)
+  (0x05f1, 0x7bbe, 47, 43),     # 045: p=0.032174 (    0,    0)
+  (0x05f1, 0x7bbe, 48, 44),     # 046: p=0.032174 (    0,    0)
+  (0x04f9, 0x7c75, 49, 45),     # 047: p=0.026928 (    0,    0)
+  (0x04f9, 0x7c75, 50, 46),     # 048: p=0.026928 (    0,    0)
+  (0x0425, 0x7d0f, 51, 47),     # 049: p=0.022444 (    0,    0)
+  (0x0425, 0x7d0f, 52, 48),     # 050: p=0.022444 (    0,    0)
+  (0x0371, 0x7d91, 53, 49),     # 051: p=0.018636 (    0,    0)
+  (0x0371, 0x7d91, 54, 50),     # 052: p=0.018636 (    0,    0)
+  (0x02d9, 0x7dfe, 55, 51),     # 053: p=0.015421 (    0,    0)
+  (0x02d9, 0x7dfe, 56, 52),     # 054: p=0.015421 (    0,    0)
+  (0x0259, 0x7e5a, 57, 53),     # 055: p=0.012713 (    0,    0)
+  (0x0259, 0x7e5a, 58, 54),     # 056: p=0.012713 (    0,    0)
+  (0x01ed, 0x7ea6, 59, 55),     # 057: p=0.010419 (    0,    0)
+  (0x01ed, 0x7ea6, 60, 56),     # 058: p=0.010419 (    0,    0)
+  (0x0193, 0x7ee6, 61, 57),     # 059: p=0.008525 (    0,    0)
+  (0x0193, 0x7ee6, 62, 58),     # 060: p=0.008525 (    0,    0)
+  (0x0149, 0x7f1a, 63, 59),     # 061: p=0.006959 (    0,    0)
+  (0x0149, 0x7f1a, 64, 60),     # 062: p=0.006959 (    0,    0)
+  (0x010b, 0x7f45, 65, 61),     # 063: p=0.005648 (    0,    0)
+  (0x010b, 0x7f45, 66, 62),     # 064: p=0.005648 (    0,    0)
+  (0x00d5, 0x7f6b, 67, 63),     # 065: p=0.004506 (    0,    0)
+  (0x00d5, 0x7f6b, 68, 64),     # 066: p=0.004506 (    0,    0)
+  (0x00a5, 0x7f8d, 69, 65),     # 067: p=0.003480 (    0,    0)
+  (0x00a5, 0x7f8d, 70, 66),     # 068: p=0.003480 (    0,    0)
+  (0x007b, 0x7faa, 71, 67),     # 069: p=0.002602 (    0,    0)
+  (0x007b, 0x7faa, 72, 68),     # 070: p=0.002602 (    0,    0)
+  (0x0057, 0x7fc3, 73, 69),     # 071: p=0.001843 (    0,    0)
+  (0x0057, 0x7fc3, 74, 70),     # 072: p=0.001843 (    0,    0)
+  (0x003b, 0x7fd7, 75, 71),     # 073: p=0.001248 (    0,    0)
+  (0x003b, 0x7fd7, 76, 72),     # 074: p=0.001248 (    0,    0)
+  (0x0023, 0x7fe7, 77, 73),     # 075: p=0.000749 (    0,    0)
+  (0x0023, 0x7fe7, 78, 74),     # 076: p=0.000749 (    0,    0)
+  (0x0013, 0x7ff2, 79, 75),     # 077: p=0.000402 (    0,    0)
+  (0x0013, 0x7ff2, 80, 76),     # 078: p=0.000402 (    0,    0)
+  (0x0007, 0x7ffa, 81, 77),     # 079: p=0.000153 (    0,    0)
+  (0x0007, 0x7ffa, 82, 78),     # 080: p=0.000153 (    0,    0)
+  (0x0001, 0x7fff, 81, 79),     # 081: p=0.000027 (    0,    0)
+  (0x0001, 0x7fff, 82, 80),     # 082: p=0.000027 (    0,    0)
+  (0x5695, 0x0000, 9, 85),      # 083: p=0.411764 (    2,    3)
+  (0x24ee, 0x0000, 86, 226),    # 084: p=0.199988 (    1,    0)
+  (0x8000, 0x0000, 5, 6),       # 085: p=0.500000 (    3,    3)
+  (0x0d30, 0x0000, 88, 176),    # 086: p=0.071422 (    4,    0)
+  (0x481a, 0x0000, 89, 143),    # 087: p=0.363634 (    1,    2)
+  (0x0481, 0x0000, 90, 138),    # 088: p=0.024388 (   13,    0)
+  (0x3579, 0x0000, 91, 141),    # 089: p=0.285711 (    1,    3)
+  (0x017a, 0x0000, 92, 112),    # 090: p=0.007999 (   41,    0)
+  (0x24ef, 0x0000, 93, 135),    # 091: p=0.199997 (    1,    5)
+  (0x007b, 0x0000, 94, 104),    # 092: p=0.002611 (  127,    0)
+  (0x1978, 0x0000, 95, 133),    # 093: p=0.137929 (    1,    8)
+  (0x0028, 0x0000, 96, 100),    # 094: p=0.000849 (  392,    0)
+  (0x10ca, 0x0000, 97, 129),    # 095: p=0.090907 (    1,   13)
+  (0x000d, 0x0000, 82, 98),     # 096: p=0.000276 ( 1208,    0)
+  (0x0b5d, 0x0000, 99, 127),    # 097: p=0.061537 (    1,   20)
+  (0x0034, 0x0000, 76, 72),     # 098: p=0.001102 ( 1208,    1)
+  (0x078a, 0x0000, 101, 125),   # 099: p=0.040815 (    1,   31)
+  (0x00a0, 0x0000, 70, 102),    # 100: p=0.003387 (  392,    1)
+  (0x050f, 0x0000, 103, 123),   # 101: p=0.027397 (    1,   47)
+  (0x0117, 0x0000, 66, 60),     # 102: p=0.005912 (  392,    2)
+  (0x0358, 0x0000, 105, 121),   # 103: p=0.018099 (    1,   72)
+  (0x01ea, 0x0000, 106, 110),   # 104: p=0.010362 (  127,    1)
+  (0x0234, 0x0000, 107, 119),   # 105: p=0.011940 (    1,  110)
+  (0x0144, 0x0000, 66, 108),    # 106: p=0.006849 (  193,    1)
+  (0x0173, 0x0000, 109, 117),   # 107: p=0.007858 (    1,  168)
+  (0x0234, 0x0000, 60, 54),     # 108: p=0.011925 (  193,    2)
+  (0x00f5, 0x0000, 111, 115),   # 109: p=0.005175 (    1,  256)
+  (0x0353, 0x0000, 56, 48),     # 110: p=0.017995 (  127,    2)
+  (0x00a1, 0x0000, 69, 113),    # 111: p=0.003413 (    1,  389)
+  (0x05c5, 0x0000, 114, 134),   # 112: p=0.031249 (   41,    1)
+  (0x011a, 0x0000, 65, 59),     # 113: p=0.005957 (    2,  389)
+  (0x03cf, 0x0000, 116, 132),   # 114: p=0.020618 (   63,    1)
+  (0x01aa, 0x0000, 61, 55),     # 115: p=0.009020 (    2,  256)
+  (0x0285, 0x0000, 118, 130),   # 116: p=0.013652 (   96,    1)
+  (0x0286, 0x0000, 57, 51),     # 117: p=0.013672 (    2,  168)
+  (0x01ab, 0x0000, 120, 128),   # 118: p=0.009029 (  146,    1)
+  (0x03d3, 0x0000, 53, 47),     # 119: p=0.020710 (    2,  110)
+  (0x011a, 0x0000, 122, 126),   # 120: p=0.005961 (  222,    1)
+  (0x05c5, 0x0000, 49, 41),     # 121: p=0.031250 (    2,   72)
+  (0x00ba, 0x0000, 124, 62),    # 122: p=0.003925 (  338,    1)
+  (0x08ad, 0x0000, 43, 37),     # 123: p=0.046979 (    2,   47)
+  (0x007a, 0x0000, 72, 66),     # 124: p=0.002586 (  514,    1)
+  (0x0ccc, 0x0000, 39, 31),     # 125: p=0.069306 (    2,   31)
+  (0x01eb, 0x0000, 60, 54),     # 126: p=0.010386 (  222,    2)
+  (0x1302, 0x0000, 33, 25),     # 127: p=0.102940 (    2,   20)
+  (0x02e6, 0x0000, 56, 50),     # 128: p=0.015695 (  146,    2)
+  (0x1b81, 0x0000, 29, 131),    # 129: p=0.148935 (    2,   13)
+  (0x045e, 0x0000, 52, 46),     # 130: p=0.023648 (   96,    2)
+  (0x24ef, 0x0000, 23, 17),     # 131: p=0.199999 (    3,   13)
+  (0x0690, 0x0000, 48, 40),     # 132: p=0.035533 (   63,    2)
+  (0x2865, 0x0000, 23, 15),     # 133: p=0.218748 (    2,    8)
+  (0x09de, 0x0000, 42, 136),    # 134: p=0.053434 (   41,    2)
+  (0x3987, 0x0000, 137, 7),     # 135: p=0.304346 (    2,    5)
+  (0x0dc8, 0x0000, 38, 32),     # 136: p=0.074626 (   41,    3)
+  (0x2c99, 0x0000, 21, 139),    # 137: p=0.241378 (    2,    7)
+  (0x10ca, 0x0000, 140, 172),   # 138: p=0.090907 (   13,    1)
+  (0x3b5f, 0x0000, 15, 9),      # 139: p=0.312499 (    3,    7)
+  (0x0b5d, 0x0000, 142, 170),   # 140: p=0.061537 (   20,    1)
+  (0x5695, 0x0000, 9, 85),      # 141: p=0.411764 (    2,    3)
+  (0x078a, 0x0000, 144, 168),   # 142: p=0.040815 (   31,    1)
+  (0x8000, 0x0000, 141, 248),   # 143: p=0.500000 (    2,    2)
+  (0x050f, 0x0000, 146, 166),   # 144: p=0.027397 (   47,    1)
+  (0x24ee, 0x0000, 147, 247),   # 145: p=0.199988 (    0,    1)
+  (0x0358, 0x0000, 148, 164),   # 146: p=0.018099 (   72,    1)
+  (0x0d30, 0x0000, 149, 197),   # 147: p=0.071422 (    0,    4)
+  (0x0234, 0x0000, 150, 162),   # 148: p=0.011940 (  110,    1)
+  (0x0481, 0x0000, 151, 95),    # 149: p=0.024388 (    0,   13)
+  (0x0173, 0x0000, 152, 160),   # 150: p=0.007858 (  168,    1)
+  (0x017a, 0x0000, 153, 173),   # 151: p=0.007999 (    0,   41)
+  (0x00f5, 0x0000, 154, 158),   # 152: p=0.005175 (  256,    1)
+  (0x007b, 0x0000, 155, 165),   # 153: p=0.002611 (    0,  127)
+  (0x00a1, 0x0000, 70, 156),    # 154: p=0.003413 (  389,    1)
+  (0x0028, 0x0000, 157, 161),   # 155: p=0.000849 (    0,  392)
+  (0x011a, 0x0000, 66, 60),     # 156: p=0.005957 (  389,    2)
+  (0x000d, 0x0000, 81, 159),    # 157: p=0.000276 (    0, 1208)
+  (0x01aa, 0x0000, 62, 56),     # 158: p=0.009020 (  256,    2)
+  (0x0034, 0x0000, 75, 71),     # 159: p=0.001102 (    1, 1208)
+  (0x0286, 0x0000, 58, 52),     # 160: p=0.013672 (  168,    2)
+  (0x00a0, 0x0000, 69, 163),    # 161: p=0.003387 (    1,  392)
+  (0x03d3, 0x0000, 54, 48),     # 162: p=0.020710 (  110,    2)
+  (0x0117, 0x0000, 65, 59),     # 163: p=0.005912 (    2,  392)
+  (0x05c5, 0x0000, 50, 42),     # 164: p=0.031250 (   72,    2)
+  (0x01ea, 0x0000, 167, 171),   # 165: p=0.010362 (    1,  127)
+  (0x08ad, 0x0000, 44, 38),     # 166: p=0.046979 (   47,    2)
+  (0x0144, 0x0000, 65, 169),    # 167: p=0.006849 (    1,  193)
+  (0x0ccc, 0x0000, 40, 32),     # 168: p=0.069306 (   31,    2)
+  (0x0234, 0x0000, 59, 53),     # 169: p=0.011925 (    2,  193)
+  (0x1302, 0x0000, 34, 26),     # 170: p=0.102940 (   20,    2)
+  (0x0353, 0x0000, 55, 47),     # 171: p=0.017995 (    2,  127)
+  (0x1b81, 0x0000, 30, 174),    # 172: p=0.148935 (   13,    2)
+  (0x05c5, 0x0000, 175, 193),   # 173: p=0.031249 (    1,   41)
+  (0x24ef, 0x0000, 24, 18),     # 174: p=0.199999 (   13,    3)
+  (0x03cf, 0x0000, 177, 191),   # 175: p=0.020618 (    1,   63)
+  (0x2b74, 0x0000, 178, 222),   # 176: p=0.235291 (    4,    1)
+  (0x0285, 0x0000, 179, 189),   # 177: p=0.013652 (    1,   96)
+  (0x201d, 0x0000, 180, 218),   # 178: p=0.173910 (    6,    1)
+  (0x01ab, 0x0000, 181, 187),   # 179: p=0.009029 (    1,  146)
+  (0x1715, 0x0000, 182, 216),   # 180: p=0.124998 (    9,    1)
+  (0x011a, 0x0000, 183, 185),   # 181: p=0.005961 (    1,  222)
+  (0x0fb7, 0x0000, 184, 214),   # 182: p=0.085105 (   14,    1)
+  (0x00ba, 0x0000, 69, 61),     # 183: p=0.003925 (    1,  338)
+  (0x0a67, 0x0000, 186, 212),   # 184: p=0.056337 (   22,    1)
+  (0x01eb, 0x0000, 59, 53),     # 185: p=0.010386 (    2,  222)
+  (0x06e7, 0x0000, 188, 210),   # 186: p=0.037382 (   34,    1)
+  (0x02e6, 0x0000, 55, 49),     # 187: p=0.015695 (    2,  146)
+  (0x0496, 0x0000, 190, 208),   # 188: p=0.024844 (   52,    1)
+  (0x045e, 0x0000, 51, 45),     # 189: p=0.023648 (    2,   96)
+  (0x030d, 0x0000, 192, 206),   # 190: p=0.016529 (   79,    1)
+  (0x0690, 0x0000, 47, 39),     # 191: p=0.035533 (    2,   63)
+  (0x0206, 0x0000, 194, 204),   # 192: p=0.010959 (  120,    1)
+  (0x09de, 0x0000, 41, 195),    # 193: p=0.053434 (    2,   41)
+  (0x0155, 0x0000, 196, 202),   # 194: p=0.007220 (  183,    1)
+  (0x0dc8, 0x0000, 37, 31),     # 195: p=0.074626 (    3,   41)
+  (0x00e1, 0x0000, 198, 200),   # 196: p=0.004750 (  279,    1)
+  (0x2b74, 0x0000, 199, 243),   # 197: p=0.235291 (    1,    4)
+  (0x0094, 0x0000, 72, 64),     # 198: p=0.003132 (  424,    1)
+  (0x201d, 0x0000, 201, 239),   # 199: p=0.173910 (    1,    6)
+  (0x0188, 0x0000, 62, 56),     # 200: p=0.008284 (  279,    2)
+  (0x1715, 0x0000, 203, 237),   # 201: p=0.124998 (    1,    9)
+  (0x0252, 0x0000, 58, 52),     # 202: p=0.012567 (  183,    2)
+  (0x0fb7, 0x0000, 205, 235),   # 203: p=0.085105 (    1,   14)
+  (0x0383, 0x0000, 54, 48),     # 204: p=0.019021 (  120,    2)
+  (0x0a67, 0x0000, 207, 233),   # 205: p=0.056337 (    1,   22)
+  (0x0547, 0x0000, 50, 44),     # 206: p=0.028571 (   79,    2)
+  (0x06e7, 0x0000, 209, 231),   # 207: p=0.037382 (    1,   34)
+  (0x07e2, 0x0000, 46, 38),     # 208: p=0.042682 (   52,    2)
+  (0x0496, 0x0000, 211, 229),   # 209: p=0.024844 (    1,   52)
+  (0x0bc0, 0x0000, 40, 34),     # 210: p=0.063636 (   34,    2)
+  (0x030d, 0x0000, 213, 227),   # 211: p=0.016529 (    1,   79)
+  (0x1178, 0x0000, 36, 28),     # 212: p=0.094593 (   22,    2)
+  (0x0206, 0x0000, 215, 225),   # 213: p=0.010959 (    1,  120)
+  (0x19da, 0x0000, 30, 22),     # 214: p=0.139999 (   14,    2)
+  (0x0155, 0x0000, 217, 223),   # 215: p=0.007220 (    1,  183)
+  (0x24ef, 0x0000, 26, 16),     # 216: p=0.199998 (    9,    2)
+  (0x00e1, 0x0000, 219, 221),   # 217: p=0.004750 (    1,  279)
+  (0x320e, 0x0000, 20, 220),    # 218: p=0.269229 (    6,    2)
+  (0x0094, 0x0000, 71, 63),     # 219: p=0.003132 (    1,  424)
+  (0x432a, 0x0000, 14, 8),      # 220: p=0.344827 (    6,    3)
+  (0x0188, 0x0000, 61, 55),     # 221: p=0.008284 (    2,  279)
+  (0x447d, 0x0000, 14, 224),    # 222: p=0.349998 (    4,    2)
+  (0x0252, 0x0000, 57, 51),     # 223: p=0.012567 (    2,  183)
+  (0x5ece, 0x0000, 8, 2),       # 224: p=0.434782 (    4,    3)
+  (0x0383, 0x0000, 53, 47),     # 225: p=0.019021 (    2,  120)
+  (0x8000, 0x0000, 228, 87),    # 226: p=0.500000 (    1,    1)
+  (0x0547, 0x0000, 49, 43),     # 227: p=0.028571 (    2,   79)
+  (0x481a, 0x0000, 230, 246),   # 228: p=0.363634 (    2,    1)
+  (0x07e2, 0x0000, 45, 37),     # 229: p=0.042682 (    2,   52)
+  (0x3579, 0x0000, 232, 244),   # 230: p=0.285711 (    3,    1)
+  (0x0bc0, 0x0000, 39, 33),     # 231: p=0.063636 (    2,   34)
+  (0x24ef, 0x0000, 234, 238),   # 232: p=0.199997 (    5,    1)
+  (0x1178, 0x0000, 35, 27),     # 233: p=0.094593 (    2,   22)
+  (0x1978, 0x0000, 138, 236),   # 234: p=0.137929 (    8,    1)
+  (0x19da, 0x0000, 29, 21),     # 235: p=0.139999 (    2,   14)
+  (0x2865, 0x0000, 24, 16),     # 236: p=0.218748 (    8,    2)
+  (0x24ef, 0x0000, 25, 15),     # 237: p=0.199998 (    2,    9)
+  (0x3987, 0x0000, 240, 8),     # 238: p=0.304346 (    5,    2)
+  (0x320e, 0x0000, 19, 241),    # 239: p=0.269229 (    2,    6)
+  (0x2c99, 0x0000, 22, 242),    # 240: p=0.241378 (    7,    2)
+  (0x432a, 0x0000, 13, 7),      # 241: p=0.344827 (    3,    6)
+  (0x3b5f, 0x0000, 16, 10),     # 242: p=0.312499 (    7,    3)
+  (0x447d, 0x0000, 13, 245),    # 243: p=0.349998 (    2,    4)
+  (0x5695, 0x0000, 10, 2),      # 244: p=0.411764 (    3,    2)
+  (0x5ece, 0x0000, 7, 1),       # 245: p=0.434782 (    3,    4)
+  (0x8000, 0x0000, 244, 83),    # 246: p=0.500000 (    2,    2)
+  (0x8000, 0x0000, 249, 250),   # 247: p=0.500000 (    1,    1)
+  (0x5695, 0x0000, 10, 2),      # 248: p=0.411764 (    3,    2)
+  (0x481a, 0x0000, 89, 143),    # 249: p=0.363634 (    1,    2)
+  (0x481a, 0x0000, 230, 246),   # 250: p=0.363634 (    2,    1)
+  (0, 0, 0, 0),
+  (0, 0, 0, 0),
+  (0, 0, 0, 0),
+  (0, 0, 0, 0),
+  (0, 0, 0, 0),
+]
+
+
+xmtf = (
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+  0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+  0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+  0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+  0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
+  0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+  0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
+  0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+  0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+  0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+  0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+  0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+  0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+  0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+  0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+  0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+  0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
+)
+ # }}}
+
+def chr3(l):
+    return bytes(bytearray(l))
+
+class BZZDecoder():
+    def __init__(self, infile, outfile):
+        self.instream = infile
+        self.outf = outfile
+        self.ieof = False
+        self.bptr = None
+        self.xsize = None
+        self.outbuf = [0] * (MAXBLOCK * 1024)
+        self.byte = None
+        self.scount = 0
+        self.delay = 25
+        self.a = 0
+        self.code = 0
+        self.bufint = 0
+        self.ctx = [0] * 300
+        # table
+        self.p = [0] * 256
+        self.m = [0] * 256
+        self.up = [0] * 256
+        self.dn = [0] * 256
+        # machine independent ffz
+        self.ffzt = [0] * 256
+
+        # Create machine independent ffz table
+        for i in range(256):
+            j = i
+            while(j & 0x80):
+                self.ffzt[i] += 1
+                j <<= 1
+        # Initialize table
+        self.newtable(default_ztable)
+        # Codebit counter
+        # Read first 16 bits of code
+        if not self.read_byte():
+            self.byte = 0xff
+        self.code = (self.byte << 8)
+        if not self.read_byte():
+            self.byte = 0xff
+        self.code = self.code | self.byte
+        # Preload buffer
+        self.preload()
+        # Compute initial fence
+        self.fence = self.code
+        if self.code >= 0x8000:
+            self.fence = 0x7fff
+
+    def convert(self, sz):
+        if self.ieof:
+            return 0
+        copied = 0
+        while sz > 0 and not (self.ieof):
+            # Decode if needed
+            if not self.xsize:
+                self.bptr = 0
+                if not self.decode():   # input block size set in decode
+                    self.xsize = 1
+                    self.ieof = True
+                self.xsize -= 1
+
+            # Compute remaining
+            bytes = self.xsize
+            if bytes > sz:
+                bytes = sz
+            # Transfer
+            if bytes:
+                for i in range(bytes):
+                    self.outf.write(chr3(self.outbuf[self.bptr + i]))
+            self.xsize -= bytes
+            self.bptr += bytes
+            sz -= bytes
+            copied += bytes
+            # offset += bytes; // for tell()
+        return copied
+
+    def preload(self):
+        while self.scount <= 24:
+            if self.read_byte() < 1:
+                self.byte = 0xff
+                if --self.delay < 1:
+                    raise BZZDecoderError("BiteStream EOF")
+            self.bufint = (self.bufint << 8) | self.byte
+            self.scount += 8
+
+    def newtable(self, table):
+        for i in range(256):
+            self.p[i] = table[i][0]
+            self.m[i] = table[i][1]
+            self.up[i] = table[i][2]
+            self.dn[i] = table[i][3]
+
+    def decode(self):
+        outbuf = self.outbuf
+        # Decode block size
+        self.xsize = self.decode_raw(24)
+        if not self.xsize:
+            return 0
+        if self.xsize > MAXBLOCK * 1024:        # 4MB (4096 * 1024) is max block
+            raise BZZDecoderError("BiteStream.corrupt")
+        # Dec11ode Estimation Speed
+        fshift = 0
+        if self.zpcodec_decoder():
+            fshift += 1
+            if self.zpcodec_decoder():
+                fshift += 1
+        # Prepare Quasi MTF
+        mtf = list(xmtf) # unsigned chars
+        freq = [0] * FREQMAX
+        fadd = 4
+        # Decode
+        mtfno = 3
+        markerpos = -1
+        for i in range(self.xsize):
+            ctxid = CTXIDS - 1
+            if ctxid > mtfno:
+                ctxid = mtfno
+            cx = self.ctx
+            if self.zpcodec_decode(cx, ctxid):
+                mtfno = 0
+                outbuf[i] = mtf[mtfno]
+            elif self.zpcodec_decode(cx, ctxid + CTXIDS):
+                mtfno = 1
+                outbuf[i] = mtf[mtfno]
+            elif self.zpcodec_decode(cx, 2*CTXIDS):
+                mtfno = 2 + self.decode_binary(cx, 2*CTXIDS + 1, 1)
+                outbuf[i] = mtf[mtfno]
+            elif self.zpcodec_decode(cx, 2*CTXIDS+2):
+                mtfno = 4 + self.decode_binary(cx, 2*CTXIDS+2 + 1, 2)
+                outbuf[i] = mtf[mtfno]
+            elif self.zpcodec_decode(cx, 2*CTXIDS + 6):
+                mtfno = 8 + self.decode_binary(cx, 2*CTXIDS + 6 + 1, 3)
+                outbuf[i] = mtf[mtfno]
+            elif self.zpcodec_decode(cx, 2*CTXIDS + 14):
+                mtfno = 16 + self.decode_binary(cx, 2*CTXIDS + 14 + 1, 4)
+                outbuf[i] = mtf[mtfno]
+            elif self.zpcodec_decode(cx, 2*CTXIDS + 30 ):
+                mtfno = 32 + self.decode_binary(cx, 2*CTXIDS + 30 + 1, 5)
+                outbuf[i] = mtf[mtfno]
+            elif self.zpcodec_decode(cx, 2*CTXIDS + 62 ):
+                mtfno = 64 + self.decode_binary(cx, 2*CTXIDS + 62 + 1, 6)
+                outbuf[i] = mtf[mtfno]
+            elif self.zpcodec_decode(cx, 2*CTXIDS + 126):
+                mtfno = 128 + self.decode_binary(cx, 2*CTXIDS + 126 + 1, 7)
+                outbuf[i] = mtf[mtfno]
+            else:
+                mtfno = 256  # EOB
+                outbuf[i] = 0
+                markerpos = i
+                continue
+
+            # Rotate mtf according to empirical frequencies (new!)
+            # :rotate label
+            # Adjust frequencies for overflow
+            fadd = fadd + (fadd >> fshift)
+            if fadd > 0x10000000:
+                fadd >>= 24
+                freq[0] >>= 24
+                freq[1] >>= 24
+                freq[2] >>= 24
+                freq[3] >>= 24
+                for k in range(4, FREQMAX):
+                    freq[k] = freq[k] >> 24
+                # Relocate new char according to new freq
+            fc = fadd
+            if mtfno < FREQMAX:
+                fc += freq[mtfno]
+            k = mtfno
+            while (k >= FREQMAX):
+                mtf[k] = mtf[k - 1]
+                k -= 1
+            while (k > 0 and fc >= freq[k - 1]):
+                mtf[k] = mtf[k - 1]
+                freq[k] = freq[k - 1]
+                k -= 1
+            mtf[k] = outbuf[i]
+            freq[k] = fc
+        #///////////////////////////////
+        #//////// Reconstruct the string
+
+        if markerpos < 1 or markerpos >= self.xsize:
+            raise BZZDecoderError("BiteStream.corrupt")
+        # Allocate pointers
+        posn = [0] * self.xsize
+        # Prepare count buffer
+        count = [0] * 256
+        # Fill count buffer
+        for i in range(markerpos):
+            c = outbuf[i]
+            posn[i] = (c << 24) | (count[c] & 0xffffff)
+            count[c] += 1
+        for i in range(markerpos + 1, self.xsize):
+            c = outbuf[i]
+            posn[i] = (c << 24) | (count[c] & 0xffffff)
+            count[c] += 1
+        # Compute sorted char positions
+        last = 1
+        for i in range(256):
+            tmp = count[i]
+            count[i] = last
+            last += tmp
+        # Undo the sort transform
+        i = 0
+        last = self.xsize - 1
+        while last > 0:
+            n = posn[i]
+            c = (posn[i] >> 24)
+            last -= 1
+            outbuf[last] = c
+            i = count[c] + (n & 0xffffff)
+        # Free and check
+        if i != markerpos:
+            raise BZZDecoderError("BiteStream.corrupt")
+        return self.xsize
+
+    def decode_raw(self, bits):
+        n = 1
+        m = (1 << bits)
+        while n < m:
+            b = self.zpcodec_decoder()
+            n = (n << 1) | b
+        return n - m
+
+    def decode_binary(self, ctx, index, bits):
+        n = 1
+        m = (1 << bits)
+        while n < m:
+            b = self.zpcodec_decode(ctx, index + n - 1)
+            n = (n << 1) | b
+        return n - m
+
+    def zpcodec_decoder(self):
+        return self.decode_sub_simple(0, 0x8000 + (self.a >> 1))
+
+    def decode_sub_simple(self, mps, z):
+        # Test MPS/LPS
+        if z > self.code:
+            # LPS branch
+            z = 0x10000 - z
+            self.a += +z
+            self.code = self.code + z
+            # LPS renormalization
+            shift = self.ffz()
+            self.scount -= shift
+            self.a = self.a << shift
+            self.a &= 0xffff
+            self.code = (self.code << shift) | ((self.bufint >> self.scount) & ((1 << shift) - 1))
+            self.code &= 0xffff
+            if self.scount < 16:
+                self.preload()
+            # Adjust fence
+            self.fence = self.code
+            if self.code >= 0x8000:
+                self.fence = 0x7fff
+            result = mps ^ 1
+        else:
+            # MPS renormalization
+            self.scount -= 1
+            self.a = (z << 1) & 0xffff
+            self.code = ((self.code << 1) | ((self.bufint >> self.scount) & 1))
+            self.code &= 0xffff
+            if self.scount < 16:
+                self.preload()
+            # Adjust fence
+            self.fence = self.code
+            if self.code >= 0x8000:
+                self.fence = 0x7fff
+            result = mps
+        return result
+
+    def decode_sub(self, ctx, index, z):
+        # Save bit
+        bit = (ctx[index] & 1)
+        # Avoid interval reversion
+        d = 0x6000 + ((z + self.a) >> 2)
+        if z > d:
+            z = d
+        # Test MPS/LPS
+        if z > self.code:
+            # LPS branch
+            z = 0x10000 - z
+            self.a += +z
+            self.code = self.code + z
+            # LPS adaptation
+            ctx[index] = self.dn[ctx[index]]
+            # LPS renormalization
+            shift = self.ffz()
+            self.scount -= shift
+            self.a = (self.a << shift) & 0xffff
+            self.code = ((self.code << shift) | ((self.bufint >> self.scount) & ((1 << shift) - 1))) & 0xffff
+            if self.scount < 16:
+                self.preload()
+            # Adjust fence
+            self.fence = self.code
+            if self.code >= 0x8000:
+                self.fence = 0x7fff
+            return bit ^ 1
+        else:
+            # MPS adaptation
+            if self.a >= self.m[ctx[index]]:
+                ctx[index] = self.up[ctx[index]]
+            # MPS renormalization
+            self.scount -= 1
+            self.a = z << 1 & 0xffff
+            self.code = ((self.code << 1) | ((self.bufint >> self.scount) & 1)) & 0xffff
+            if self.scount < 16:
+                self.preload()
+            # Adjust fence
+            self.fence = self.code
+            if self.code >= 0x8000:
+                self.fence = 0x7fff
+            return bit
+
+    def zpcodec_decode(self, ctx, index):
+        z = self.a + self.p[ctx[index]]
+        if z <= self.fence:
+            self.a = z
+            res = (ctx[index] & 1)
+        else:
+            res = self.decode_sub(ctx, index, z)
+        return res
+
+    def read_byte(self):
+        res = 0
+        if self.instream:
+            ires = self.instream.read(1)
+            res = len(ires)
+            if res:
+                self.byte = ord(ires[0])
+        else:
+            raise NotImplementedError
+        return res
+
+    def ffz(self):
+        x = self.a
+        if (x >= 0xff00):
+            return (self.ffzt[x & 0xff] + 8)
+        else:
+            return (self.ffzt[(x >> 8) & 0xff])
+
+
+
+### for testing
+
+def main():
+    import sys
+    infile = file(sys.argv[1], "rb")
+    outfile = file(sys.argv[2], "wb")
+    dec = BZZDecoder(infile, outfile)
+    while True:
+        res = dec.convert(1024 * 1024)
+        if not res:
+            break
+
+if __name__ == "__main__":
+    main()
--- a/src/calibre/ebooks/djvu/input.py
+++ b/src/calibre/ebooks/djvu/input.py
@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, Anthon van der Neut <anthon@mnt.org>'
+__docformat__ = 'restructuredtext en'
+
+import os
+from subprocess import Popen, PIPE
+from cStringIO import StringIO
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre.ebooks.txt.processor import convert_basic
+
+class DJVUInput(InputFormatPlugin):
+
+    name        = 'DJVU Input'
+    author      = 'Anthon van der Neut'
+    description = 'Convert OCR-ed DJVU files (.djvu) to HTML'
+    file_types  = set(['djvu', 'djv'])
+
+    options = set([
+        OptionRecommendation(name='use_djvutxt', recommended_value=True,
+            help=_('Try to use the djvutxt program and fall back to pure '
+                'python implementation if it fails or is not available')),
+    ])
+
+    def convert(self, stream, options, file_ext, log, accelerators):
+        stdout = StringIO()
+        ppdjvu = True
+        # using djvutxt is MUCH faster, should make it an option
+        if options.use_djvutxt and os.path.exists('/usr/bin/djvutxt'):
+            from calibre.ptempfile import PersistentTemporaryFile
+            try:
+                fp = PersistentTemporaryFile(suffix='.djvu', prefix='djv_input')
+                filename = fp._name
+                fp.write(stream.read())
+                fp.close()
+                cmd = ['djvutxt', filename]
+                stdout.write(Popen(cmd, stdout=PIPE, close_fds=True).communicate()[0])
+                os.remove(filename)
+                ppdjvu = False
+            except:
+                stream.seek(0) # retry with the pure python converter
+        if ppdjvu:
+            from .djvu import DJVUFile
+            x = DJVUFile(stream)
+            x.get_text(stdout)
+
+        html = convert_basic(stdout.getvalue().replace(b"\n", b' ').replace(
+            b'\037', b'\n\n'))
+        # Run the HTMLized text through the html processing plugin.
+        from calibre.customize.ui import plugin_for_input_format
+        html_input = plugin_for_input_format('html')
+        for opt in html_input.options:
+            setattr(options, opt.option.name, opt.recommended_value)
+        options.input_encoding = 'utf-8'
+        base = os.getcwdu()
+        if file_ext != 'txtz' and hasattr(stream, 'name'):
+            base = os.path.dirname(stream.name)
+        fname = os.path.join(base, 'index.html')
+        c = 0
+        while os.path.exists(fname):
+            c += 1
+            fname = 'index%d.html'%c
+        htmlfile = open(fname, 'wb')
+        with htmlfile:
+            htmlfile.write(html.encode('utf-8'))
+        odi = options.debug_pipeline
+        options.debug_pipeline = None
+        # Generate oeb from html conversion.
+        with open(htmlfile.name, 'rb') as f:
+            oeb = html_input.convert(f, options, 'html', log,
+                {})
+        options.debug_pipeline = odi
+        os.remove(htmlfile.name)
+
+        # Set metadata from file.
+        from calibre.customize.ui import get_file_type_metadata
+        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
+        mi = get_file_type_metadata(stream, file_ext)
+        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
+
+        return oeb
+
--- a/src/calibre/ebooks/fb2/input.py
+++ b/src/calibre/ebooks/fb2/input.py
@ -127,7 +127,7 @@ class FB2Input(InputFormatPlugin):
    def extract_embedded_content(self, doc):
        self.binary_map = {}
        for elem in doc.xpath('./*'):
-            if 'binary' in elem.tag and elem.attrib.has_key('id'):
+            if elem.text and 'binary' in elem.tag and elem.attrib.has_key('id'):
                ct = elem.get('content-type', '')
                fname = elem.attrib['id']
                ext = ct.rpartition('/')[-1].lower()
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -30,9 +30,11 @@ class Worker(Thread): # Get details {{{
    Get book details from amazons book page in a separate thread
    '''

-    def __init__(self, url, result_queue, browser, log, relevance, domain, plugin, timeout=20):
+    def __init__(self, url, result_queue, browser, log, relevance, domain,
+            plugin, timeout=20, testing=False):
        Thread.__init__(self)
        self.daemon = True
+        self.testing = testing
        self.url, self.result_queue = url, result_queue
        self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
@ -189,10 +191,9 @@ class Worker(Thread): # Get details {{{
                self.log.exception(msg)
            return

+        oraw = raw
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
-        #open('/t/t.html', 'wb').write(raw)
-
        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return
@ -211,14 +212,20 @@ class Worker(Thread): # Get details {{{
            self.log.error(msg)
            return

-        self.parse_details(root)
+        self.parse_details(oraw, root)

-    def parse_details(self, root):
+    def parse_details(self, raw, root):
        try:
            asin = self.parse_asin(root)
        except:
            self.log.exception('Error parsing asin for url: %r'%self.url)
            asin = None
+        if self.testing:
+            import tempfile
+            with tempfile.NamedTemporaryFile(prefix=asin + '_',
+                    suffix='.html', delete=False) as f:
+                f.write(raw)
+            print ('Downloaded html for', asin, 'saved in', f.name)

        try:
            title = self.parse_title(root)
@ -310,7 +317,7 @@ class Worker(Thread): # Get details {{{
            return l.get('href').rpartition('/')[-1]

    def parse_title(self, root):
-        tdiv = root.xpath('//h1[@class="parseasinTitle"]')[0]
+        tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
        actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
        if actual_title:
            title = tostring(actual_title[0], encoding=unicode,
@ -320,11 +327,11 @@ class Worker(Thread): # Get details {{{
        return re.sub(r'[(\[].*[)\]]', '', title).strip()

    def parse_authors(self, root):
-        x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
+        x = '//h1[contains(@class, "parseasinTitle")]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
        aname = root.xpath(x)
        if not aname:
            aname = root.xpath('''
-            //h1[@class="parseasinTitle"]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
+            //h1[contains(@class, "parseasinTitle")]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
                    ''')
        for x in aname:
            x.tail = ''
@ -666,7 +673,8 @@ class Amazon(Source):
            log.error('No matches found with query: %r'%query)
            return

-        workers = [Worker(url, result_queue, br, log, i, domain, self) for i, url in
+        workers = [Worker(url, result_queue, br, log, i, domain, self,
+            testing=getattr(self, 'running_a_test', False)) for i, url in
                enumerate(matches)]

        for w in workers:
@ -740,16 +748,6 @@ if __name__ == '__main__': # tests {{{

            ),

-            ( # An e-book ISBN not on Amazon, the title/author search matches
-              # the Kindle edition, which has different markup for ratings and
-              # isbn
-                {'identifiers':{'isbn': '9780307459671'},
-                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
-                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
-                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
-
-            ),
-
            (  # This isbn not on amazon
                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
                    'authors':['Lutz']},
@ -783,7 +781,7 @@ if __name__ == '__main__': # tests {{{
    de_tests = [ # {{{
            (
                {'identifiers':{'isbn': '3548283519'}},
-                [title_test('Wer Wind sät',
+                [title_test('Wer Wind Sät: Der Fünfte Fall Für Bodenstein Und Kirchhoff',
                    exact=True), authors_test(['Nele Neuhaus'])
                 ]

@ -835,6 +833,6 @@ if __name__ == '__main__': # tests {{{
    ] # }}}

    test_identify_plugin(Amazon.name, com_tests)
-    #test_identify_plugin(Amazon.name, es_tests)
+    #test_identify_plugin(Amazon.name, de_tests)
 # }}}

--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -196,6 +196,7 @@ class Source(Plugin):

    def __init__(self, *args, **kwargs):
        Plugin.__init__(self, *args, **kwargs)
+        self.running_a_test = False # Set to True when using identify_test()
        self._isbn_to_identifier_cache = {}
        self._identifier_to_cover_url_cache = {}
        self.cache_lock = threading.RLock()
@ -284,14 +285,15 @@ class Source(Plugin):

        if authors:
            # Leave ' in there for Irish names
-            remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]')
-            replace_pat = re.compile(r'[-+.:;]')
+            remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
+            replace_pat = re.compile(r'[-+.:;,]')
            if only_first_author:
                authors = authors[:1]
            for au in authors:
+                has_comma = ',' in au
                au = replace_pat.sub(' ', au)
                parts = au.split()
-                if ',' in au:
+                if has_comma:
                    # au probably in ln, fn form
                    parts = parts[1:] + parts[:1]
                for tok in parts:
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -183,7 +183,11 @@ def test_identify_plugin(name, tests): # {{{
        rq = Queue()
        args = (log, rq, abort)
        start_time = time.time()
-        err = plugin.identify(*args, **kwargs)
+        plugin.running_a_test = True
+        try:
+            err = plugin.identify(*args, **kwargs)
+        finally:
+            plugin.running_a_test = False
        total_time = time.time() - start_time
        times.append(total_time)
        if err is not None:
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -138,6 +138,7 @@ class MobiMLizer(object):
            self.mobimlize_elem(body, stylizer, BlockState(nbody),
                                [FormatState()])
            item.data = nroot
+            #print etree.tostring(nroot)

    def mobimlize_font(self, ptsize):
        return self.fnums[self.fmap[ptsize]]
@ -233,9 +234,19 @@ class MobiMLizer(object):
        elif tag in TABLE_TAGS:
            para.attrib['valign'] = 'top'
        if istate.ids:
-            last = bstate.body[-1]
-            for id in istate.ids:
-                last.addprevious(etree.Element(XHTML('a'), attrib={'id': id}))
+            for id_ in istate.ids:
+                anchor = etree.Element(XHTML('a'), attrib={'id': id_})
+                if tag == 'li':
+                    try:
+                        last = bstate.body[-1][-1]
+                    except:
+                        break
+                    last.insert(0, anchor)
+                    anchor.tail = last.text
+                    last.text = None
+                else:
+                    last = bstate.body[-1]
+                    last.addprevious(anchor)
            istate.ids.clear()
        if not text:
            return
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -66,12 +66,15 @@ class EXTHHeader(object):
                # last update time
                pass
            elif id == 503: # Long title
-                if not title or title == _('Unknown') or \
-                        'USER_CONTENT' in title or title.startswith('dtp_'):
-                    try:
-                        title = content.decode(codec)
-                    except:
-                        pass
+                # Amazon seems to regard this as the definitive book title
+                # rather than the title from the PDB header. In fact when
+                # sending MOBI files through Amazon's email service if the
+                # title contains non ASCII chars or non filename safe chars
+                # they are messed up in the PDB header
+                try:
+                    title = content.decode(codec)
+                except:
+                    pass
            #else:
            #    print 'unknown record', id, repr(content)
        if title:
@ -325,6 +328,10 @@ class MobiReader(object):
        self.processed_html = self.processed_html.replace('</</', '</')
        self.processed_html = re.sub(r'</([a-zA-Z]+)<', r'</\1><',
                self.processed_html)
+        # Remove tags of the form <xyz: ...> as they can cause issues further
+        # along the pipeline
+        self.processed_html = re.sub(r'</{0,1}[a-zA-Z]+:\s+[^>]*>', '',
+                self.processed_html)

        for pat in ENCODING_PATS:
            self.processed_html = pat.sub('', self.processed_html)
--- a/Show More
+++ b/Show More