Merge from custcol trunk

2025-07-09 03:04:10 -04:00 · 2010-05-19 23:37:13 +01:00 · 2010-05-19 23:37:13 +01:00 · 39aa8b03a6
commit 39aa8b03a6
parent 057743f177 c0f70a7820
21 changed files with 524 additions and 48 deletions
--- a/resources/images/news/emg_en.png
+++ b/resources/images/news/emg_en.png
--- a/resources/images/news/emg_rs.recipe
+++ b/resources/images/news/emg_rs.recipe
--- a/resources/recipes/billorielly.recipe
+++ b/resources/recipes/billorielly.recipe
@ -9,6 +9,7 @@ class BillOReilly(BasicNewsRecipe):
    title          		    = u"Bill O'Reilly"
    __author__ 	          = 'Rob Lammert - rob.lammert[at]gmail.com'
    description		        = u"Articles from Bill O'Reilly's website and his Fox New's website"
+    language = 'en'
    oldest_article        = 7.0
    max_articles_per_feed = 100
    recursions            = 0
--- a/resources/recipes/digitalspy_uk.recipe
+++ b/resources/recipes/digitalspy_uk.recipe
@ -31,7 +31,7 @@ class DigitalSpyUK(BasicNewsRecipe):

    remove_tags       = [dict(name=['link'])]
    remove_attributes = ['height','width']
-    keep_only_tags    = [dict(name='div',attrs={'id':'article'})]
+    keep_only_tags    = [dict(name='div',attrs={'id':'content'})]

    feeds = [
              (u'News'          , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml'          )
--- a/resources/recipes/emg_en.recipe
+++ b/resources/recipes/emg_en.recipe
@ -0,0 +1,45 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+emg.rs/en/news
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class emportal_en(BasicNewsRecipe):
+    title                 = 'Ekonom:east News'
+    __author__            = 'Darko Miletic'
+    description           = 'Daily business news from Serbia.'
+    publisher             = 'Ekonom:east Media Group'
+    category              = 'Business, SEE, Serbia, Belgrade, news, Ekonomist, EMportal'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.emg.rs/img/emportal-rss.png'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags        = [dict(attrs={'class':['text-share']})]
+    keep_only_tags     = [dict(attrs={'class':'text'})]
+    remove_tags_after  = dict(attrs={'class':'text-share'})
+    remove_attributes  = ['width','height']
+
+    feeds = [(u'Serbia' , u'http://www.emg.rs/en/news/serbia/rss.xml')]
+
+    def print_version(self, url):
+        return url.replace('.html','.print.html')
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/emg_rs.recipe
+++ b/resources/recipes/emg_rs.recipe
@ -0,0 +1,51 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+emg.rs/vesti
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class emportal_rs(BasicNewsRecipe):
+    title                 = 'Ekonom:east Vesti'
+    __author__            = 'Darko Miletic'
+    description           = 'Vasa dnevna doza poslovnih informacija iz Srbije, regiona i sveta. Vesti, Berze, Dogadaji, Casopisi.'
+    publisher             = 'Ekonom:east Media Group'
+    category              = 'Ekonom:east Media Group, Ekonomist, Budelar, Bankar, EMportal, Preduzeca, Moja Posla, EU praktikum, ekonomija, Srbija, Beograd, investicije, finansije, energetika, berza'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'sr'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.emg.rs/img/emportal-rss.png'
+    extra_css             = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,Helvetica,sans1,sans-serif } .article_description{font-family: sans1, sans-serif} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+    remove_tags        = [dict(attrs={'class':['text-share']})]
+    keep_only_tags     = [dict(attrs={'class':'text'})]
+    remove_tags_after  = dict(attrs={'class':'text-share'})
+    remove_attributes  = ['width','height']
+
+    feeds = [
+              (u'Srbija' , u'http://www.emg.rs/vesti/srbija/rss.xml')
+             ,(u'Region' , u'http://www.emg.rs/vesti/region/rss.xml')
+             ,(u'Svet'   , u'http://www.emg.rs/vesti/svet/rss.xml'  )
+            ]
+
+    def print_version(self, url):
+        return url.replace('.html','.print.html')
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/resources/recipes/il_messaggero.recipe
+++ b/resources/recipes/il_messaggero.recipe
@ -7,14 +7,14 @@ description   = 'Italian daily newspaper - v1.01 (04, January 2010)'
 '''
 http://www.messaggero.it/
 '''
-
+import time
 from calibre.web.feeds.news import BasicNewsRecipe

 class IlMessaggero(BasicNewsRecipe):
    __author__    = 'Gabriele Marini'
    description   = 'Italian News'

-    cover_url      = 'http://www.ilmessaggero.it/img_tst/logomsgr.gif'
+#    cover_url      = 'http://www.ilmessaggero.it/img_tst/logomsgr.gif'
    title          = u'Il Messaggero'
    publisher      = 'Caltagirone Editore'
    category       = 'News, politics, culture, economy, general interest'
@ -34,6 +34,21 @@ class IlMessaggero(BasicNewsRecipe):
                          dict(name='h2', attrs={'class':'sottotitLettura'}),
                          dict(name='span', attrs={'class':'testoArticoloG'})
                         ]
+    def get_cover_url(self):
+        cover = None
+        st = time.localtime()
+        year = str(st.tm_year)
+        month = "%.2d" % st.tm_mon
+        day = "%.2d" % st.tm_mday
+        cover='http://carta.ilmessaggero.it/' + year +  month + day + '/jpeg/MSGR_20_CITTA_1.jpg'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            self.log("\nCover unavailable")
+            cover = 'http://www.ilmessaggero.it/img_tst/logomsgr.gif'
+
+        return cover



--- a/resources/recipes/la_stampa.recipe
+++ b/resources/recipes/la_stampa.recipe
@ -0,0 +1,67 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Gabriele Marini, based on Darko Miletic'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__description__ = 'La Stampa 05/05/2010'
+
+'''
+http://www.lastampa.it/
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LaStampa(BasicNewsRecipe):
+
+ title                 = u'La Stampa'
+ language = 'it'
+ __author__ = 'Gabriele Marini'
+ oldest_article        = 15
+ max_articles_per_feed = 50
+ recursion             = 100
+ cover_url             = 'http://www.lastampa.it/edicola/PDF/1.pdf'
+ use_embedded_content  = False
+ remove_javascript     = True
+ no_stylesheets        = True
+
+ conversion_options = {'linearize_tables':True}
+ remove_attributes = ['colspan']
+
+ extra_css = ' .boxocchiello2{font-size: small} .catenaccio{font-style: italic} .titoloRub{font-size: xx-large; font-weight: bold } .titologir{font-size: xx-large; font-weight: bold } .sezione{font-weight: bold} '
+
+
+ def get_article_url(self, article):
+    link = article.get('links')
+    if link:
+       return link[0]['href']
+
+ keep_only_tags = [dict(attrs={'class':['boxocchiello2','titoloRub','titologir','catenaccio','sezione','articologirata']}),
+                   dict(name='div', attrs={'id':'corpoarticolo'})
+                  ]
+
+ remove_tags = [dict(name='div', attrs={'id':'menutop'}),
+                dict(name='div', attrs={'id':'fwnetblocco'}),
+                dict(name='table', attrs={'id':'strumenti'}),
+                dict(name='table', attrs={'id':'imgesterna'}),
+                dict(name='a', attrs={'class':'linkblu'}),
+                dict(name='a', attrs={'class':'link'}),
+                dict(name='span', attrs={'class':['boxocchiello','boxocchiello2','sezione']})
+               ]
+
+ feeds    = [
+             (u'Home', u'http://www.lastampa.it/redazione/rss_home.xml'),
+             (u'Editoriali', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=25'),
+             (u'Politica', u'http://www.lastampa.it/redazione/cmssezioni/politica/rss_politica.xml'),
+             (u'ArciItaliana', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=14'),
+             (u'Cronache', u'http://www.lastampa.it/redazione/cmssezioni/cronache/rss_cronache.xml'),
+             (u'Esteri', u'http://www.lastampa.it/redazione/cmssezioni/esteri/rss_esteri.xml'),
+             (u'Danni Collaterali', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=90'),
+             (u'Economia', u'http://www.lastampa.it/redazione/cmssezioni/economia/rss_economia.xml'),
+             (u'Tecnologia ', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=30'),
+             (u'Spettacoli', u'http://www.lastampa.it/redazione/cmssezioni/spettacoli/rss_spettacoli.xml'),
+             (u'Sport', u'http://www.lastampa.it/sport/rss_home.xml'),
+             (u'Torino', u'http://rss.feedsportal.com/c/32418/f/466938/index.rss'),
+             (u'Motori', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=57'),
+             (u'Scienza', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=38'),
+             (u'Fotografia', u'http://rss.feedsportal.com/c/32418/f/478449/index.rss'),
+             (u'Scuola', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=60'),
+             (u'Tempo Libero', u'http://www.lastampa.it/tempolibero/rss_home.xml')
+            ]
--- a/resources/recipes/leggo_it.recipe
+++ b/resources/recipes/leggo_it.recipe
@ -7,14 +7,14 @@ description   = 'Italian daily newspaper - v1.00 05-05-2010'
 '''
 http://www.leggo.it
 '''
-
+import time
 from calibre.web.feeds.news import BasicNewsRecipe

 class LeggoIT(BasicNewsRecipe):
    __author__        = 'Gabriele Marini'
    description   = 'Italian Free daily newspaper'

-    cover_url      = 'http://www.leggo.it/img/logo-leggo2.gif'
+#    cover_url      = 'http://www.leggo.it/img/logo-leggo2.gif'
    title          = u'Leggo.it'
    publisher      = 'Ced Caltagirone Editore S.p.A.'
    category       = 'News, politics, culture, economy, general interest'
@ -46,4 +46,20 @@ class LeggoIT(BasicNewsRecipe):
                       (u'Salute', u'http://www.leggo.it/rss/salute.xml'),
                       (u'Scienza', u'http://www.leggo.it/rss/scienza.xml')
                     ]
+    def get_cover_url(self):
+        cover = None
+        st = time.localtime()
+        year = str(st.tm_year)
+        month = "%.2d" % st.tm_mon
+        day = "%.2d" % st.tm_mday
+        cover='http://www.leggo.it/'+ year +  month + day + '/jpeg/LEGGO_ROMA_1.jpg'
+        br = BasicNewsRecipe.get_browser()
+        try:
+            br.open(cover)
+        except:
+            self.log("\nCover unavailable")
+            cover = 'http://www.leggo.it/img/logo-leggo2.gif'
+
+        return cover
+

--- a/resources/recipes/politifact.recipe
+++ b/resources/recipes/politifact.recipe
@ -1,4 +1,4 @@
-from calibre.wb.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe

 class PolitiFactCom(BasicNewsRecipe):
    title          = u'Politifact'
--- a/resources/recipes/seanhannity.recipe
+++ b/resources/recipes/seanhannity.recipe
@ -8,6 +8,7 @@ class SeanHannity(BasicNewsRecipe):
    __author__ 	          = 'Rob Lammert - rob.lammert[at]gmail.com'
    description		        = u"Articles from Sean Hannity's website, www.hannity.com"
    oldest_article        = 7.0
+    language = 'en'
    max_articles_per_feed = 100
    recursions            = 0
    encoding              = 'utf8'
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -442,7 +442,6 @@ from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK
-from calibre.devices.prs500.driver import PRS500
 from calibre.devices.prs505.driver import PRS505, PRS700
 from calibre.devices.android.driver import ANDROID, S60
 from calibre.devices.nokia.driver import N770, N810
@ -512,7 +511,6 @@ plugins += [
    NOOK,
    PRS505,
    PRS700,
-    PRS500,
    ANDROID,
    S60,
    N770,
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -27,6 +27,34 @@ def strftime(epoch, zone=time.gmtime):
    src[2] = INVERSE_MONTH_MAP[int(src[2])]
    return ' '.join(src)

+def get_connected_device():
+    from calibre.customize.ui import device_plugins
+    from calibre.devices.scanner import DeviceScanner
+    dev = None
+    scanner = DeviceScanner()
+    scanner.scan()
+    connected_devices = []
+    for d in device_plugins():
+        ok, det = scanner.is_device_connected(d)
+        if ok:
+            dev = d
+            dev.reset(log_packets=False, detected_device=det)
+            connected_devices.append(dev)
+
+    if dev is None:
+        print >>sys.stderr, 'Unable to find a connected ebook reader.'
+        return
+
+    for d in connected_devices:
+        try:
+            d.open()
+        except:
+            continue
+        else:
+            dev = d
+            break
+    return dev
+
 def debug(ioreg_to_tmp=False, buf=None):
    from calibre.customize.ui import device_plugins
    from calibre.devices.scanner import DeviceScanner, win_pnp_drives
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -418,3 +418,16 @@ class BookList(list):
        '''
        raise NotImplementedError()

+    def get_collections(self, collection_attributes):
+        '''
+        Return a dictionary of collections created from collection_attributes.
+        Each entry in the dictionary is of the form collection name:[list of
+        books]
+
+        The list of books is sorted by book title, except for collections
+        created from series, in which case series_index is used.
+
+        :param collection_attributes: A list of attributes of the Book object
+        '''
+        raise NotImplementedError()
+
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -20,7 +20,7 @@ class PALMPRE(USBMS):
    FORMATS     = ['mobi', 'prc', 'pdb', 'txt']

    VENDOR_ID   = [0x0830]
-    PRODUCT_ID  = [0x8004, 0x8002]
+    PRODUCT_ID  = [0x8004, 0x8002, 0x0101]
    BCD         = [0x0316]

    VENDOR_NAME = 'PALM'
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -71,7 +71,7 @@ class PRS505(USBMS):
        return fname

    def initialize_XML_cache(self):
-        paths = {}
+        paths, prefixes = {}, {}
        for prefix, path, source_id in [
                ('main', MEDIA_XML, 0),
                ('card_a', CACHE_XML, 1),
@ -80,10 +80,11 @@ class PRS505(USBMS):
            prefix = getattr(self, '_%s_prefix'%prefix)
            if prefix is not None and os.path.exists(prefix):
                paths[source_id] = os.path.join(prefix, *(path.split('/')))
+                prefixes[source_id] = prefix
                d = os.path.dirname(paths[source_id])
                if not os.path.exists(d):
                    os.makedirs(d)
-        return XMLCache(paths)
+        return XMLCache(paths, prefixes)

    def books(self, oncard=None, end_session=True):
        bl = USBMS.books(self, oncard=oncard, end_session=end_session)
@ -95,11 +96,17 @@ class PRS505(USBMS):
        c = self.initialize_XML_cache()
        blists = {}
        for i in c.paths:
+            if booklists[i] is not None:
                blists[i] = booklists[i]
-        c.update(blists)
+        opts = self.settings()
+        collections = ['series', 'tags']
+        if opts.extra_customization:
+            collections = opts.extra_customization.split(',')
+
+        c.update(blists, collections)
        c.write()

-        USBMS.sync_booklists(self, booklists, end_session)
+        USBMS.sync_booklists(self, booklists, end_session=end_session)

 class PRS700(PRS505):

--- a/src/calibre/devices/prs505/sony_cache.py
+++ b/src/calibre/devices/prs505/sony_cache.py
@ -5,30 +5,68 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os
+import os, time
 from pprint import pprint
 from base64 import b64decode
+from uuid import uuid4

 from lxml import etree

-from calibre import prints
+from calibre import prints, guess_type
 from calibre.devices.errors import DeviceError
 from calibre.constants import DEBUG
 from calibre.ebooks.chardet import xml_to_unicode
-from calibre.ebooks.metadata import string_to_authors
+from calibre.ebooks.metadata import string_to_authors, authors_to_string

+# Utility functions {{{
 EMPTY_CARD_CACHE = '''\
 <?xml version="1.0" encoding="UTF-8"?>
 <cache xmlns="http://www.kinoma.com/FskCache/1">
 </cache>
 '''

+MIME_MAP   = {
+                "lrf" : "application/x-sony-bbeb",
+                'lrx' : 'application/x-sony-bbeb',
+                "rtf" : "application/rtf",
+                "pdf" : "application/pdf",
+                "txt" : "text/plain" ,
+                'epub': 'application/epub+zip',
+              }
+
+DAY_MAP   = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6)
+MONTH_MAP = dict(Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12)
+INVERSE_DAY_MAP = dict(zip(DAY_MAP.values(), DAY_MAP.keys()))
+INVERSE_MONTH_MAP = dict(zip(MONTH_MAP.values(), MONTH_MAP.keys()))
+
+def strptime(src):
+    src = src.strip()
+    src = src.split()
+    src[0] = str(DAY_MAP[src[0][:-1]])+','
+    src[2] = str(MONTH_MAP[src[2]])
+    return time.strptime(' '.join(src), '%w, %d %m %Y %H:%M:%S %Z')
+
+def strftime(epoch, zone=time.gmtime):
+    src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone(epoch)).split()
+    src[0] = INVERSE_DAY_MAP[int(src[0][:-1])]+','
+    src[2] = INVERSE_MONTH_MAP[int(src[2])]
+    return ' '.join(src)
+
+def uuid():
+    return str(uuid4()).replace('-', '', 1).upper()
+
+# }}}
+
 class XMLCache(object):

-    def __init__(self, paths):
+    def __init__(self, paths, prefixes):
        if DEBUG:
+            prints('Building XMLCache...')
            pprint(paths)
        self.paths = paths
+        self.prefixes = prefixes
+
+        # Parse XML files {{{
        parser = etree.XMLParser(recover=True)
        self.roots = {}
        for source_id, path in paths.items():
@ -47,10 +85,13 @@ class XMLCache(object):
                        raw, strip_encoding_pats=True, assume_utf8=True,
                        verbose=DEBUG)[0],
                        parser=parser)
+        # }}}

        recs = self.roots[0].xpath('//*[local-name()="records"]')
        if not recs:
-            raise DeviceError('The SONY XML database is corrupted (no <records>)')
+            raise DeviceError('The SONY XML database is corrupted (no'
+                    ' <records>). Try disconnecting an reconnecting'
+                    ' your reader.')
        self.record_roots = {}
        self.record_roots.update(self.roots)
        self.record_roots[0] = recs[0]
@ -75,18 +116,69 @@ class XMLCache(object):
        for i, root in self.record_roots.items():
            self.purge_broken_playlist_items(root)
            for playlist in root.xpath('//*[local-name()="playlist"]'):
-                if len(playlist) == 0:
+                if len(playlist) == 0 or not playlist.get('title', None):
                    if DEBUG:
-                        prints('Removing playlist:', playlist.get('id', None))
+                        prints('Removing playlist id:', playlist.get('id', None),
+                                playlist.get('title', None))
                    playlist.getparent().remove(playlist)

+    def ensure_unique_playlist_titles(self):
+        for i, root in self.record_roots.items():
+            seen = set([])
+            for playlist in root.xpath('//*[local-name()="playlist"]'):
+                title = playlist.get('title', None)
+                if title is None:
+                    title = _('Unnamed')
+                    playlist.set('title', title)
+                if title in seen:
+                    for i in range(2, 1000):
+                        if title+str(i) not in seen:
+                            title = title+str(i)
+                            playlist.set('title', title)
+                            break
+                else:
+                    seen.add(title)
+
+    def get_playlist_map(self):
+        ans = {}
+        self.ensure_unique_playlist_titles()
+        self.prune_empty_playlists()
+        for i, root in self.record_roots.items():
+            ans[i] = {}
+            for playlist in root.xpath('//*[local-name()="playlist"]'):
+                items = []
+                for item in playlist:
+                    id_ = item.get('id', None)
+                    records = root.xpath(
+                        '//*[local-name()="text" and @id="%s"]'%id_)
+                    if records:
+                        items.append(records[0])
+                ans[i] = {playlist.get('title'):items}
+        return ans
+
+    def get_or_create_playlist(self, bl_idx, title):
+        root = self.record_roots[bl_idx]
+        for playlist in root.xpath('//*[local-name()="playlist"]'):
+            if playlist.get('title', None) == title:
+                return playlist
+        if DEBUG:
+            prints('Creating playlist:', title)
+        ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
+                nsmap=root.nsmap, attrib={
+                    'uuid' : uuid(),
+                    'title': title,
+                    'id'   : str(self.max_id(root)+1),
+                    'sourceid': '1'
+                    })
+        root.append(ans)
+        return ans
    # }}}

    def fix_ids(self): # {{{

        def ensure_numeric_ids(root):
            idmap = {}
-            for x in root.xpath('//*[@id]'):
+            for x in root.xpath('child::*[@id]'):
                id_ = x.get('id')
                try:
                    id_ = int(id_)
@ -119,7 +211,9 @@ class XMLCache(object):
            for item in root.xpath('//*[@sourceid]'):
                sid = pl_sourceid if item.tag.endswith('playlist') else sourceid
                item.set('sourceid', str(sid))
-            items = root.xpath('//*[@id]')
+            # Only rebase ids of nodes that are immediate children of the
+            # record root (that way playlist/itemnodes are unaffected
+            items = root.xpath('child::*[@id]')
            items.sort(cmp=lambda x,y:cmp(int(x.get('id')), int(y.get('id'))))
            idmap = {}
            for i, item in enumerate(items):
@ -127,13 +221,13 @@ class XMLCache(object):
                new = base + i
                if old != new:
                    item.set('id', str(new))
-                idmap[old] = str(new)
+                    idmap[str(old)] = str(new)
            return idmap

        self.prune_empty_playlists()

        for i in sorted(self.roots.keys()):
-            root = self.roots[i]
+            root = self.record_roots[i]
            if i == 0:
                ensure_media_xml_base_ids(root)

@ -157,7 +251,8 @@ class XMLCache(object):
        self.roots[0].set('nextID', str(max_id+1))
    # }}}

-    def update_booklist(self, bl, bl_index): # {{{
+    # Update JSON from XML {{{
+    def update_booklist(self, bl, bl_index):
        if bl_index not in self.record_roots:
            return
        root = self.record_roots[bl_index]
@ -189,17 +284,129 @@ class XMLCache(object):
                    break
    # }}}

-    def update(self, booklists):
-        pass
+    # Update XML from JSON {{{
+    def update(self, booklists, collections_attributes):
+        playlist_map = self.get_playlist_map()
+
+        for i, booklist in booklists.items():
+            if DEBUG:
+                prints('Updating booklist:', i)
+            root = self.record_roots[i]
+            for book in booklist:
+                path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
+                record = self.book_by_lpath(book.lpath, root)
+                if record is None:
+                    record = self.create_text_record(root, i, book.lpath)
+                self.update_text_record(record, book, path, i)
+                bl_pmap = playlist_map[i]
+                self.update_playlists(i, root, booklist, bl_pmap,
+                        collections_attributes)
+
+        self.fix_ids()
+
+    def update_playlists(self, bl_index, root, booklist, playlist_map,
+            collections_attributes):
+        collections = booklist.get_collections(collections_attributes)
+        for category, books in collections.items():
+            records = [self.book_by_lpath(b.lpath, root) for b in books]
+            # Remove any books that were not found, although this
+            # *should* never happen
+            if DEBUG and None in records:
+                prints('WARNING: Some elements in the JSON cache were not'
+                        'found in the XML cache')
+            records = [x for x in records if x is not None]
+            for rec in records:
+                if rec.get('id', None) is None:
+                    rec.set('id', str(self.max_id(root)+1))
+            ids = [x.get('id', None) for x in records]
+            if None in ids:
+                if DEBUG:
+                    prints('WARNING: Some <text> elements do not have ids')
+                    ids = [x for x in ids if x is not None]
+
+            playlist = self.get_or_create_playlist(bl_index, category)
+            playlist_ids = []
+            for item in playlist:
+                id_ = item.get('id', None)
+                if id_ is not None:
+                    playlist_ids.append(id_)
+            for item in list(playlist):
+                playlist.remove(item)
+
+            extra_ids = [x for x in playlist_ids if x not in ids]
+            for id_ in ids + extra_ids:
+                item = playlist.makeelement(
+                        '{%s}item'%self.namespaces[bl_index],
+                        nsmap=playlist.nsmap, attrib={'id':id_})
+                playlist.append(item)
+
+
+
+    def create_text_record(self, root, bl_id, lpath):
+        namespace = self.namespaces[bl_id]
+        id_ = self.max_id(root)+1
+        attrib = {
+                'page':'0', 'part':'0','pageOffset':'0','scale':'0',
+                'id':str(id_), 'sourceid':'1', 'path':lpath}
+        ans = root.makeelement('{%s}text'%namespace, attrib=attrib, nsmap=root.nsmap)
+        root.append(ans)
+        return ans
+
+    def update_text_record(self, record, book, path, bl_index):
+        timestamp = os.path.getctime(path)
+        date = strftime(timestamp)
+        if date != record.get('date', None):
+            if DEBUG:
+                prints('Changing date of', path, 'from',
+                        record.get('date', ''), 'to', date)
+                prints('\tctime', strftime(os.path.getctime(path)))
+                prints('\tmtime', strftime(os.path.getmtime(path)))
+            record.set('date', date)
+        record.set('size', str(os.stat(path).st_size))
+        record.set('title', book.title)
+        record.set('author', authors_to_string(book.authors))
+        ext = os.path.splitext(path)[1]
+        if ext:
+            ext = ext[1:].lower()
+            mime = MIME_MAP.get(ext, None)
+            if mime is None:
+                mime = guess_type('a.'+ext)[0]
+                if mime is not None:
+                    record.set('mime', mime)
+        if 'sourceid' not in record.attrib:
+            record.set('sourceid', '1')
+        if 'id' not in record.attrib:
+            num = self.max_id(record.getroottree().getroot())
+            record.set('id', str(num+1))
+    # }}}
+
+    # Writing the XML files {{{
+    def cleanup_whitespace(self, bl_index):
+        root = self.record_roots[bl_index]
+        level = 2 if bl_index == 0 else 1
+        if len(root) > 0:
+            root.text = '\n'+'\t'*level
+            for child in root:
+                child.tail = '\n'+'\t'*level
+                if len(child) > 0:
+                    child.text = '\n'+'\t'*(level+1)
+                    for gc in child:
+                        gc.tail = '\n'+'\t'*(level+1)
+                    child.iterchildren(reversed=True).next().tail = '\n'+'\t'*level
+            root.iterchildren(reversed=True).next().tail = '\n'+'\t'*(level-1)

    def write(self):
-        return
        for i, path in self.paths.items():
-            raw = etree.tostring(self.roots[i], encoding='utf-8',
+            self.cleanup_whitespace(i)
+            raw = etree.tostring(self.roots[i], encoding='UTF-8',
                    xml_declaration=True)
+            raw = raw.replace("<?xml version='1.0' encoding='UTF-8'?>",
+                    '<?xml version="1.0" encoding="UTF-8"?>')
            with open(path, 'wb') as f:
                f.write(raw)
+    # }}}

+    # Utility methods {{{
    def book_by_lpath(self, lpath, root):
        matches = root.xpath(u'//*[local-name()="text" and @path="%s"]'%lpath)
        if matches:
@ -245,4 +452,5 @@ class XMLCache(object):
            pprint(self.nsmaps)
            prints('Found namespaces:')
            pprint(self.namespaces)
+    # }}}

--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -4,9 +4,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import os
-import re
-import time
+import os, re, time, sys

 from calibre.ebooks.metadata import MetaInformation
 from calibre.devices.mime import mime_type_ext
@ -110,6 +108,9 @@ class Book(MetaInformation):
            if isbytestring(val):
                enc = filesystem_encoding if attr == 'lpath' else preferred_encoding
                val = val.decode(enc, 'replace')
+            elif isinstance(val, (list, tuple)):
+                val = [x.decode(preferred_encoding, 'replace') if
+                        isbytestring(x) else x for x in val]
            json[attr] = val
        return json

@ -129,3 +130,34 @@ class BookList(_BookList):

    def remove_book(self, book):
        self.remove(book)
+
+    def get_collections(self, collection_attributes):
+        collections = {}
+        series_categories = set([])
+        for attr in collection_attributes:
+            for book in self:
+                val = getattr(book, attr, None)
+                if not val: continue
+                if isbytestring(val):
+                    val = val.decode(preferred_encoding, 'replace')
+                if isinstance(val, (list, tuple)):
+                    val = list(val)
+                elif isinstance(val, unicode):
+                    val = [val]
+                for category in val:
+                    if category not in collections:
+                        collections[category] = []
+                    collections[category].append(book)
+                    if attr == 'series':
+                        series_categories.add(category)
+        for category, books in collections.items():
+            def tgetter(x):
+                return getattr(x, 'title_sort', 'zzzz')
+            books.sort(cmp=lambda x,y:cmp(tgetter(x), tgetter(y)))
+            if category in series_categories:
+                # Ensures books are sub sorted by title
+                def getter(x):
+                    return getattr(x, 'series_index', sys.maxint)
+                books.sort(cmp=lambda x,y:cmp(getter(x), getter(y)))
+        return collections
+
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -98,11 +98,10 @@ class USBMS(CLI, Device):
                    import traceback
                    traceback.print_exc()
            return changed
-
        if isinstance(ebook_dirs, basestring):
            ebook_dirs = [ebook_dirs]
        for ebook_dir in ebook_dirs:
-            ebook_dir = self.path_to_unicode(filesystem_encoding)
+            ebook_dir = self.path_to_unicode(ebook_dir)
            ebook_dir = self.normalize_path( \
                            os.path.join(prefix, *(ebook_dir.split('/'))) \
                                    if ebook_dir else prefix)
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -78,18 +78,12 @@ class KindleDX(Kindle):
    name = 'Kindle DX'
    id = 'kindledx'

-class Sony500(Device):
+class Sony505(Device):

    output_profile = 'sony'
-    name = 'SONY PRS 500'
-    output_format = 'LRF'
-    manufacturer = 'SONY'
-    id = 'prs500'
-
-class Sony505(Sony500):
-
+    name = 'SONY Reader 6" and Touch Editions'
    output_format = 'EPUB'
-    name = 'SONY Reader 6" and Touch Edition'
+    manufacturer = 'SONY'
    id = 'prs505'

 class Kobo(Device):
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -106,6 +106,7 @@ _extra_lang_codes = {
        'en_SG' : _('English (Singapore)'),
        'en_YE' : _('English (Yemen)'),
        'en_IE' : _('English (Ireland)'),
+        'en_CN' : _('English (China)'),
        'es_PY' : _('Spanish (Paraguay)'),
        'de_AT' : _('German (AT)'),
        'nl'    : _('Dutch (NL)'),