KG 0.7.45

2026-01-05 11:40:20 -05:00 · 2011-02-11 14:53:40 -07:00 · 2011-02-11 14:53:40 -07:00 · ce56d9ea72
commit ce56d9ea72
parent f274a7a5bf 2d00173e9e
90 changed files with 83002 additions and 65469 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,106 @@
 #  new recipes:
 #    - title: 

+- version: 0.7.45
+  date: 2011-02-11
+
+  new features:
+    - title: "Add plugin to download series information from the Kent District Library"
+
+    - title: "Kindle driver: When uploading MOBI files to the device, upload page number information as well (used by the not yet released Kindle 3.1 firmware)"
+
+    - title: "When automatically sending news to device, send to main memory preferentially, if it has enough space."
+      tickets: [8877]
+
+    - title: "Allow customization of which metadata fields are searched by default (click the preferences icon next to the search box)"
+
+    - title: "New format TXTZ: which is a zip file containing the TXT file and associated images + metadata. calibre can convert to and from this format and read/write metadata to it."
+
+    - title: "New option to control how automerge handles duplicate formats when adding books to your calibre library. See Preferences->Adding books"
+
+    - title: "Driver for Nokia X6, Nexus S, WiBook, MyTouch 4G and Huawei Ideos S7"
+
+    - title: "Nicer interface for editing tweaks"
+
+    - title: "Add tweak to remove yellow lines from edges of book list"
+
+    - title: "Completion: Restore adding of comma at end after completion for tags type fields. Add a tweak to control if an & is added after completion for author type fields"
+
+    - title: "Turn search as you type off by default for searching the book list. You can turn it on by clicking the preferences button next to the search bar."
+
+    - title: "TXT Input: Add option to remove indents and fix bug where spaces were not retained properly."
+  
+  bug fixes:
+    - title: "Fix a regression in 0.7.44 that could cause setting authors to fail in windows when the author name is very long"
+      tickets: [8797]
+
+    - title: "E-book viewer: Fix bug that could cause the bottom of chapters to get cut-off if the topmost element had a large top margin."
+      tickets: [8791]
+
+    - title: "Fix regression that caused a spurious error message after moving a library. Also ensure that the entries in the Copy to Library menu are updated after a library is moved/renamed/deleted."
+      tickets: [8905]
+
+    - title: "PML Input: New handling of t and T tags. T's that do not start the line are ignored. t's that start and end the line use a margin for the text block"
+
+    - title: "News download: Remove all invalid ASCII control characters from article descriptions as they cause XML parsing to fail"
+
+    - title: "MOBI Output: Fix bug that was discarding non breaking spaces at the start of a paragraph when they were followed immediately by a tag."
+      tickets: [4887]
+
+    - title: "LIT Input: Fix a regression in handling LIT files that contain txt rather than html data"
+      tickets: [8904]
+
+    - title: "Fix bug in search box in the plugins dialog"
+      tickets: [8882]
+
+    - title: "Fix renaming of categories via the Tag Browser"
+      tickets: [8807]
+
+    - title: "Content server: Do not send mobile version to iPad"
+      tickets: [8820]
+
+    - title: "Fix undefined publication date appearing in book jacket as 101"
+      tickets: [8799]
+
+    - title: "Heuristics: Fix issue with invalid markup from italicize patterns."
+
+    - title: "TXT Input: De-hyphenate textile and markdown inpu as well. Fix inline toc not showing all items."
+
+    - title: "RTF Input: More encoding token splitting fixes."
+
+    - title: "Fix regression that broke the convenience Email to xxx entry in the connect share menu."
+      tickets: [8775]
+
+    - title: "Fix editing of series type custom columns in the book list."
+      tickets: [8765]
+
+  improved recipes:
+    - El periodico de Aragon
+    - B92
+    - French Belgian news sources
+
+  new recipes:
+    - title: "ABC.es"
+      author: "Ricardo Jurado"
+
+    - title: "Korespondent and Kopalnia Wiedzy"
+      author: "Attis"
+
+    - title: "Radio Prague"
+      author: "Francois Pellicaan"
+
+    - title: "Europa Press"
+      author: "Luis Hernandez"
+
+    - title: "Interoperability Happens and njuz.net"
+      author: "Darko Miletic"
+
+    - title: "Weblogs SL"
+      author: "desUBIKado"
+
+    - title: "Kompas and Jakarta Post"
+      author: "Adrian Gunawan"
+
 - version: 0.7.44
  date: 2011-02-04

--- a/format_docs/pdb/apnx.txt
+++ b/format_docs/pdb/apnx.txt
@ -0,0 +1,69 @@
+APNX
+----
+
+apnx files are used by the Amazon Kindle (firmware revision 3.1+) to
+map pages from a print book to the Kindle version. Integers within
+the file are big-endian.
+
+
+Layout
+------
+
+bytes   content             comments 
+
+4       00010001            Format identifier. Value of 65537 little-endian.
+4       start of next       The offset after ending location of the first header.
+                            Starts a new sequence of header info
+4       length              Length of first header
+N       first header        String containing content header
+Starts next sequence
+2       unknown             Always 1
+2       length              Length of second header
+2       page count          Total number of bytes after second header that
+                            represent pages. This total includes bytes that
+                            are ignored by the pageMap.
+2       unknown             Always 32
+N       second header       String containing the page mapping header
+4*N     padding             The first number given in the page mapping header indicates the number of 0 bytes.
+4*N     page list           
+
+
+Content Header
+--------------
+
+The content header is a string enclosed in {} containing key, value pairs.
+
+content             comments
+
+contentGuid         Guid.
+asin                Amazon identifier for the Kindle version of the book.
+cdeType             MOBI cdeType. Should always be EBOK for ebooks.
+fileRevisionId      Revision of this file.
+
+Example:
+{"contentGuid":"d8c14b0","asin":"B000JML5VM","cdeType":"EBOK","fileRevisionId":"1296874359405"}
+
+
+Page Mapping Header
+-------------------
+
+The page mapping header is a string enclosed in {} containing key, value pairs.
+
+content             comments
+
+asin                The ISBN 10 for the paper book the pages correspond to
+pageMap             Three value tuple. Looks like: "(N,N,N)"
+                    1) Number of bytes after header that starts the page numbering sequence
+                    2) unknown
+                    3) unknown
+
+Example:
+{"asin":"1906694184","pageMap":"(4,a,1)"}
+
+
+Page List
+---------
+
+The page list is a sequence of offsets in the uncompressed HTML. Each
+value is the beginning of a new page. Each entry is a 4 byte big endian
+int. The list is ordered lowest to highest.
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -126,12 +126,14 @@ sort_columns_at_startup = None
 gui_pubdate_display_format = 'MMM yyyy'
 gui_timestamp_display_format = 'dd MMM yyyy'

-#: Control sorting of titles and series in the display
-# Control title and series sorting in the library view.
-# If set to 'library_order', Leading articles such as The and A will be ignored.
-# If set to 'strictly_alphabetic', the titles will be sorted without processing
-# For example, with library_order, The Client will sort under 'C'. With
-# strictly_alphabetic, the book will sort under 'T'.
+#: Control sorting of titles and series in the library display
+# Control title and series sorting in the library view. If set to
+# 'library_order', the title sort field will be used instead of the title.
+# Unless you have manually edited the title sort field, leading articles such as
+# The and A will be ignored. If set to 'strictly_alphabetic', the titles will be
+# sorted as-is (sort by title instead of title sort). For example, with
+# library_order, The Client will sort under 'C'. With strictly_alphabetic, the
+# book will sort under 'T'.
 # This flag affects Calibre's library display. It has no effect on devices. In
 # addition, titles for books added before changing the flag will retain their
 # order until the title is edited. Double-clicking on a title and hitting return
@ -140,11 +142,15 @@ title_series_sorting = 'library_order'

 #: Control formatting of title and series when used in templates
 # Control how title and series names are formatted when saving to disk/sending
-# to device. If set to library_order, leading articles such as The and A will
-# be put at the end
-# If set to 'strictly_alphabetic', the titles will be sorted without processing
-# For example, with library_order, "The Client" will become "Client, The". With
-# strictly_alphabetic, it would remain "The Client".
+# to device. The behavior depends on the field being processed. If processing
+# title, then if this tweak is set to 'library_order', the title will be
+# replaced with title_sort. If it is set to 'strictly_alphabetic', then the
+# title will not be changed. If processing series, then if set to
+# 'library_order', articles such as 'The' and 'An' will be moved to the end. If
+# set to 'strictly_alphabetic', the series will be sent without change.
+# For example, if the tweak is set to library_order, "The Lord of the Rings"
+# will become "Lord of the Rings, The". If the tweak is set to
+# strictly_alphabetic, it would remain "The Lord of the Rings".
 save_template_title_series_sorting = 'library_order'

 #: Set the list of words considered to be "articles" for sort strings
--- a/resources/recipes/abc_es.recipe
+++ b/resources/recipes/abc_es.recipe
@ -0,0 +1,68 @@
+__license__   = 'GPL v3'
+__author__    = 'Ricardo Jurado'
+__copyright__ = 'Ricardo Jurado'
+__version__     = 'v0.4'
+__date__        = '11 February 2011'
+
+'''
+http://www.abc.es/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1296604369(BasicNewsRecipe):
+
+    title          = u'ABC.es'
+    masthead_url   = 'http://www.abc.es/img/logo-abc.gif'
+    cover_url      = 'http://www.abc.es/img/logo-abc.gif'
+    publisher      = u'Grupo VOCENTO'
+
+    __author__            = 'Ricardo Jurado'
+    description           = 'Noticias de Spain y el mundo'
+    category              = 'News,Spain,National,International,Economy'
+    oldest_article = 2
+    max_articles_per_feed = 10
+
+    no_stylesheets = True
+    use_embedded_content = False
+    encoding = 'ISO-8859-1'
+    remove_javascript = True
+    language = 'es'
+
+    extra_css             = """
+                               p{text-align: justify; font-size: 100%}
+                               body{ text-align: left; font-size:100% }
+                               h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
+                               h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
+                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
+                                 """
+
+    keep_only_tags = [
+#                 dict(name='h2', attrs={'class':['logos']}),
+                 dict(name='h3', attrs={'class':['overhead']}),
+                 dict(name='h1', attrs={'class':'headline'}),
+                 dict(name='h3', attrs={'class':['subhead']}),
+                 dict(name='div', attrs={'class':'datosi'}),
+                 dict(name='div', attrs={'class':'photo-alt1'}),
+                 dict(name='div', attrs={'class':'text'})
+                 ]
+
+#    remove_tags_before = dict(name='div' , attrs={'id':['cabecera2']})
+
+    feeds          = [
+                     (u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml')
+                    ,(u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml')
+                    ,(u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml')
+                    ,(u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml')
+                    ,(u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml')
+                    ,(u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml')
+                    ,(u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml')
+                    ,(u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml')
+                    ,(u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml')
+                    ,(u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml')
+                    ,(u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml')
+                    ,(u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml')
+                    ,(u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml')
+                    ,(u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml')
+                    ,(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml')
+                     ]
--- a/resources/recipes/el_periodico.recipe
+++ b/resources/recipes/el_periodico.recipe
@ -5,8 +5,8 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.05'
-__date__        = '07, December 2010'
+__version__     = 'v0.07'
+__date__        = '06, February 2011'
 '''
 elperiodicodearagon.com
 '''
@ -38,22 +38,26 @@ class elperiodicodearagon(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }

-    feeds              = [(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
-                          (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
-                          (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
-                          (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
-                          (u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
-                          (u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
-                          (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
-                          (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
-                          (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
-                          (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')]
+    feeds              = [
+                           (u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
+                           (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
+                           (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
+                           (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
+                           (u'Deportes', u'http://elperiodicodearagon.com/RSS/7.xml'),
+                           (u'Real Zaragoza', u'http://elperiodicodearagon.com/RSS/10.xml'),
+                           (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
+                           (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
+                           (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
+                           (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
+                         ]


    extra_css = '''
-                    h3{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-                    dd{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
+                    h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
+                    h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
+                    .columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
+                    img{margin-bottom: 0.4em}
 		'''

    remove_attributes = ['height','width']
@ -82,6 +86,7 @@ class elperiodicodearagon(BasicNewsRecipe):
                          dict(name='a', attrs={'class':'AvisoComentario'}),
                          dict(name='div', attrs={'class':'CajaAvisoComentario'}),
                          dict(name='div', attrs={'class':'navegaNoticias'}),
+                          dict(name='div', attrs={'class':'Mensaje'}),
                          dict(name='div', attrs={'id':'PaginadorDiCom'}),
                          dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
                          dict(name='div', attrs={'id':'CintilloComentario'}),
@ -107,3 +112,15 @@ class elperiodicodearagon(BasicNewsRecipe):
        (re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
        ]
+
+    # Para sustituir el video incrustado de YouTube por una imagen
+
+    def preprocess_html(self, soup):
+        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
+            if video_yt:
+               video_yt.name = 'img'
+               fuente = video_yt['src']
+               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
+               video_yt['src'] = fuente2 + '/0.jpg'
+
+        return soup
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -182,6 +182,10 @@ class NYTimes(BasicNewsRecipe):
                            'mediaOverlay slideshow',
                            'headlinesOnly multiline flush',
                            'wideThumb',
+                            'video', #added 02-11-2011
+                            'videoHeader',#added 02-11-2011
+                            'articleInlineVideoHolder', #added 02-11-2011
+                            'assetCompanionAd',
                            re.compile('^subNavigation'),
                            re.compile('^leaderboard'),
                            re.compile('^module'),
@ -664,7 +668,7 @@ class NYTimes(BasicNewsRecipe):

            try:
                #remove "Related content" bar
-                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']})
+                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft  ']})
                if runAroundsFound:
                    for runAround in runAroundsFound:
                        #find all section headers
@ -672,6 +676,12 @@ class NYTimes(BasicNewsRecipe):
                        if hlines:
                            for hline in hlines:
                                hline.extract()
+                                
+                        #find all section headers
+                        hlines = runAround.findAll('h6')
+                        if hlines:
+                            for hline in hlines:
+                                hline.extract()
            except:
                self.log("Error removing related content bar")

--- a/resources/recipes/tedneward.recipe
+++ b/resources/recipes/tedneward.recipe
@ -0,0 +1,33 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+blogs.tedneward.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class InteroperabilityHappens(BasicNewsRecipe):
+    title                 = 'Interoperability Happens'
+    __author__            = 'Darko Miletic'
+    description           = 'Tech blog by Ted Neward'
+    oldest_article        = 15
+    max_articles_per_feed = 100
+    language              = 'en'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = True
+    publication_type      = 'blog'
+    extra_css             = """
+                                body{font-family: Verdana,Arial,Helvetica,sans-serif}
+                            """
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'blog, technology, microsoft, programming, C#, Java'
+                        , 'publisher': 'Ted Neward'
+                        , 'language' : language
+                        }
+
+    feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]
+
--- a/resources/recipes/weblogs_sl.recipe
+++ b/resources/recipes/weblogs_sl.recipe
@ -0,0 +1,104 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__copyright__   = '4 February 2011, desUBIKado'
+__author__      = 'desUBIKado'
+__version__     = 'v0.05'
+__date__        = '9, February 2011'
+'''
+http://www.weblogssl.com/
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class weblogssl(BasicNewsRecipe):
+    __author__     = 'desUBIKado'
+    description    = u'Weblogs colectivos dedicados a seguir la actualidad sobre tecnologia, entretenimiento, estilos de vida, motor, deportes y economia.'
+    title          = u'Weblogs SL (Xataka, Genbeta, VidaExtra, Blog de Cine y otros)'
+    publisher      = 'Weblogs SL'
+    category       = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+    oldest_article = 1.5
+    max_articles_per_feed = 100
+    encoding       = 'utf-8'
+    use_embedded_content  = False
+    remove_empty_feeds    = True
+    remove_javascript = True
+    no_stylesheets = True
+
+    # Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
+    # un caracter # por delante, es decir,  # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
+    # haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
+
+    feeds              = [
+                          (u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
+                          (u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
+                          (u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
+                          (u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
+                          (u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
+                          (u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
+                          (u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
+                          (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
+                          (u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
+                          (u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
+                          (u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
+                          (u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
+                          (u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
+                          (u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
+                          (u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
+                          (u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
+                          (u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
+                          (u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
+                          (u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
+                          (u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
+                          (u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
+                          (u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
+                          (u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
+                          (u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
+                          (u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
+                          (u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
+                          (u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
+                          (u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
+                          (u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
+                          (u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
+                          (u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
+                          (u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
+                          (u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
+                          (u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
+                          (u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
+                          (u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
+                          (u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
+                          (u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
+                          (u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
+                          (u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
+                         ]
+
+
+    keep_only_tags     = [dict(name='div', attrs={'id':'infoblock'}),
+                          dict(name='div', attrs={'class':'post'}),
+                          dict(name='div', attrs={'id':'blog-comments'})
+                         ]
+
+    remove_tags        = [dict(name='div', attrs={'id':'comment-nav'})]
+
+    def print_version(self, url):
+          return url.replace('http://www.', 'http://m.')
+
+    preprocess_regexps = [
+                            # Para poner una linea en blanco entre un comentario y el siguiente
+                           (re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
+                         ]
+
+    # Para sustituir el video incrustado de YouTube por una imagen
+
+    def preprocess_html(self, soup):
+        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
+            if video_yt:
+               video_yt.name = 'img'
+               fuente = video_yt['src']
+               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
+               fuente3 = fuente2.replace('?rel=0','')
+               video_yt['src'] = fuente3 + '/0.jpg'
+
+        return soup
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.44'
+__version__   = '0.7.45'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -507,7 +507,7 @@ from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK

 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-    LibraryThing
+    KentDistrictLibrary
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
@ -517,7 +517,7 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck

 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        LibraryThing, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
        Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
        NiceBooksCovers]
 plugins += [
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -83,7 +83,7 @@ class ANDROID(USBMS):
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
-            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE']
+            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT']
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, John Schember <john at nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Generates and writes an APNX page mapping file.
+'''
+
+import struct
+import uuid
+
+from calibre.ebooks.pdb.header import PdbHeaderReader
+
+class APNXBuilder(object):
+    '''
+    Currently uses the Adobe 1024 byte count equal one page formula.
+    '''
+
+    def write_apnx(self, mobi_file_path, apnx_path):
+        with open(mobi_file_path, 'rb') as mf:
+            phead = PdbHeaderReader(mf)
+            r0 = phead.section_data(0)
+            text_length = struct.unpack('>I', r0[4:8])[0]
+
+        pages = self.get_pages(text_length)
+        apnx = self.generate_apnx(pages)
+
+        with open(apnx_path, 'wb') as apnxf:
+            apnxf.write(apnx)
+
+    def generate_apnx(self, pages):
+        apnx = ''
+
+        content_vals = {
+            'guid': str(uuid.uuid4()).replace('-', '')[:8],
+            'isbn': '',
+        }
+
+        content_header = '{"contentGuid":"%(guid)s","asin":"%(isbn)s","cdeType":"EBOK","fileRevisionId":"1"}' % content_vals
+        page_header = '{"asin":"%(isbn)s","pageMap":"(1,a,1)"}' % content_vals
+
+        apnx += struct.pack('>I', 65537)
+        apnx += struct.pack('>I', 12 + len(content_header))
+        apnx += struct.pack('>I', len(content_header))
+        apnx += content_header
+        apnx += struct.pack('>H', 1)
+        apnx += struct.pack('>H', len(page_header))
+        apnx += struct.pack('>H', len(pages))
+        apnx += struct.pack('>H', 32)
+        apnx += page_header
+
+        # write page values to apnx
+        for page in pages:
+            apnx += struct.pack('>L', page)
+
+        return apnx
+
+    def get_pages(self, text_length):
+        pages = []
+        count = 0
+
+        while count < text_length:
+            pages.append(count)
+            count += 1024
+
+        return pages
--- a/src/calibre/devices/kindle/bookmark.py
+++ b/src/calibre/devices/kindle/bookmark.py
@ -0,0 +1,315 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__docformat__ = 'restructuredtext en'
+
+import os
+from cStringIO import StringIO
+from struct import unpack
+
+class Bookmark(): # {{{
+    '''
+    A simple class fetching bookmark data
+    Kindle-specific
+    '''
+    def __init__(self, path, id, book_format, bookmark_extension):
+        self.book_format = book_format
+        self.bookmark_extension = bookmark_extension
+        self.book_length = 0
+        self.id = id
+        self.last_read = 0
+        self.last_read_location = 0
+        self.path = path
+        self.timestamp = 0
+        self.user_notes = None
+
+        self.get_bookmark_data()
+        self.get_book_length()
+        try:
+            self.percent_read = min(float(100*self.last_read / self.book_length),100)
+        except:
+            self.percent_read = 0
+
+    def record(self, n):
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        if n >= self.nrecs:
+            raise ValueError('non-existent record %r' % n)
+        offoff = 78 + (8 * n)
+        start, = unpack('>I', self.data[offoff + 0:offoff + 4])
+        stop = None
+        if n < (self.nrecs - 1):
+            stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
+        return StreamSlicer(self.stream, start, stop)
+
+    def get_bookmark_data(self):
+        ''' Return the timestamp and last_read_location '''
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        user_notes = {}
+        if self.bookmark_extension == 'mbp':
+            MAGIC_MOBI_CONSTANT = 150
+            with open(self.path,'rb') as f:
+                stream = StringIO(f.read())
+                data = StreamSlicer(stream)
+                self.timestamp, = unpack('>I', data[0x24:0x28])
+                bpar_offset, = unpack('>I', data[0x4e:0x52])
+                lrlo = bpar_offset + 0x0c
+                self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
+                self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
+                entries, = unpack('>I', data[0x4a:0x4e])
+
+                # Store the annotations/locations
+                bpl = bpar_offset + 4
+                bpar_len, = unpack('>I', data[bpl:bpl+4])
+                bpar_len += 8
+                #print "bpar_len: 0x%x" % bpar_len
+                eo = bpar_offset + bpar_len
+
+                # Walk bookmark entries
+                #print " --- %s --- " % self.path
+                current_entry = 1
+                sig = data[eo:eo+4]
+                previous_block = None
+
+                while sig == 'DATA':
+                    text = None
+                    entry_type = None
+                    rec_len, = unpack('>I', data[eo+4:eo+8])
+                    if rec_len == 0:
+                        current_block = "empty_data"
+                    elif  data[eo+8:eo+12] == "EBAR":
+                        current_block = "data_header"
+                        #entry_type = "data_header"
+                        location, = unpack('>I', data[eo+0x34:eo+0x38])
+                        #print "data_header location: %d" % location
+                    else:
+                        current_block = "text_block"
+                        if previous_block == 'empty_data':
+                            entry_type = 'Note'
+                        elif previous_block == 'data_header':
+                            entry_type = 'Highlight'
+                        text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
+
+                    if entry_type:
+                        displayed_location = location/MAGIC_MOBI_CONSTANT + 1
+                        user_notes[location] = dict(id=self.id,
+                                                    displayed_location=displayed_location,
+                                                    type=entry_type,
+                                                    text=text)
+
+                    eo += rec_len + 8
+                    current_entry += 1
+                    previous_block = current_block
+                    sig = data[eo:eo+4]
+
+                while sig == 'BKMK':
+                    # Fix start location for Highlights using BKMK data
+                    end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
+
+                    if end_loc in user_notes and \
+                       (user_notes[end_loc]['type'] == 'Highlight' or \
+                        user_notes[end_loc]['type'] == 'Note'):
+                        # Switch location to start (0x08:0x0c)
+                        start, = unpack('>I', data[eo+8:eo+12])
+                        user_notes[start] = user_notes[end_loc]
+                        '''
+                        print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
+                                                                    end_loc,
+                                                                    end_loc/MAGIC_MOBI_CONSTANT + 1,
+                                                                    start,
+                                                                    start//MAGIC_MOBI_CONSTANT + 1)
+                        '''
+                        user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1
+                        user_notes.pop(end_loc)
+                    else:
+                        # If a bookmark coincides with a user annotation, the locs could
+                        # be the same - cheat by nudging -1
+                        # Skip bookmark for last_read_location
+                        if end_loc != self.last_read:
+                            # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
+                            displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
+                            user_notes[end_loc - 1] = dict(id=self.id,
+                                                           displayed_location=displayed_location,
+                                                           type='Bookmark',
+                                                           text=None)
+                    rec_len, = unpack('>I', data[eo+4:eo+8])
+                    eo += rec_len + 8
+                    sig = data[eo:eo+4]
+
+        elif self.bookmark_extension == 'tan':
+            from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata
+
+            def get_topaz_highlight(displayed_location):
+                # Parse My Clippings.txt for a matching highlight
+                # Search looks for book title match, highlight match, and location match
+                # Author is not matched
+                # This will find the first instance of a clipping only
+                book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
+                with open(book_fs,'rb') as f2:
+                    stream = StringIO(f2.read())
+                    mi = get_topaz_metadata(stream)
+                my_clippings = self.path
+                split = my_clippings.find('documents') + len('documents/')
+                my_clippings = my_clippings[:split] + "My Clippings.txt"
+                try:
+                    with open(my_clippings, 'r') as f2:
+                        marker_found = 0
+                        text = ''
+                        search_str1 = '%s' % (mi.title)
+                        search_str2 = '- Highlight Loc. %d' % (displayed_location)
+                        for line in f2:
+                            if marker_found == 0:
+                                if line.startswith(search_str1):
+                                    marker_found = 1
+                            elif marker_found == 1:
+                                if line.startswith(search_str2):
+                                    marker_found = 2
+                            elif marker_found == 2:
+                                if line.startswith('=========='):
+                                    break
+                                text += line.strip()
+                        else:
+                            raise Exception('error')
+                except:
+                    text = '(Unable to extract highlight text from My Clippings.txt)'
+                return text
+
+            MAGIC_TOPAZ_CONSTANT = 33.33
+            self.timestamp = os.path.getmtime(self.path)
+            with open(self.path,'rb') as f:
+                stream = StringIO(f.read())
+                data = StreamSlicer(stream)
+                self.last_read = int(unpack('>I', data[5:9])[0])
+                self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
+                entries, = unpack('>I', data[9:13])
+                current_entry = 0
+                e_base = 0x0d
+                while current_entry < entries:
+                    location, = unpack('>I', data[e_base+2:e_base+6])
+                    text = None
+                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
+                    e_type, = unpack('>B', data[e_base+1])
+                    if e_type == 0:
+                        e_type = 'Bookmark'
+                    elif e_type == 1:
+                        e_type = 'Highlight'
+                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
+                    elif e_type == 2:
+                        e_type = 'Note'
+                        text = data[e_base+0x10:e_base+0x10+text_len]
+                    else:
+                        e_type = 'Unknown annotation type'
+
+                    displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
+                    user_notes[location] = dict(id=self.id,
+                                                displayed_location=displayed_location,
+                                                type=e_type,
+                                                text=text)
+                    if text_len == 0xFFFFFFFF:
+                        e_base = e_base + 14
+                    else:
+                        e_base = e_base + 14 + 2 + text_len
+                    current_entry += 1
+                for location in user_notes:
+                    if location == self.last_read:
+                        user_notes.pop(location)
+                        break
+
+        elif self.bookmark_extension == 'pdr':
+            self.timestamp = os.path.getmtime(self.path)
+            with open(self.path,'rb') as f:
+                stream = StringIO(f.read())
+                data = StreamSlicer(stream)
+                self.last_read = int(unpack('>I', data[5:9])[0])
+                entries, = unpack('>I', data[9:13])
+                current_entry = 0
+                e_base = 0x0d
+                self.pdf_page_offset = 0
+                while current_entry < entries:
+                    '''
+                    location, = unpack('>I', data[e_base+2:e_base+6])
+                    text = None
+                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
+                    e_type, = unpack('>B', data[e_base+1])
+                    if e_type == 0:
+                        e_type = 'Bookmark'
+                    elif e_type == 1:
+                        e_type = 'Highlight'
+                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
+                    elif e_type == 2:
+                        e_type = 'Note'
+                        text = data[e_base+0x10:e_base+0x10+text_len]
+                    else:
+                        e_type = 'Unknown annotation type'
+
+                    if self.book_format in ['tpz','azw1']:
+                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
+                    elif self.book_format == 'pdf':
+                        # *** This needs implementation
+                        displayed_location = location
+                    user_notes[location] = dict(id=self.id,
+                                                displayed_location=displayed_location,
+                                                type=e_type,
+                                                text=text)
+                    if text_len == 0xFFFFFFFF:
+                        e_base = e_base + 14
+                    else:
+                        e_base = e_base + 14 + 2 + text_len
+                    current_entry += 1
+                    '''
+                    # Use label as page number
+                    pdf_location, = unpack('>I', data[e_base+1:e_base+5])
+                    label_len, = unpack('>H', data[e_base+5:e_base+7])
+                    location = int(data[e_base+7:e_base+7+label_len])
+                    displayed_location = location
+                    e_type = 'Bookmark'
+                    text = None
+                    user_notes[location] = dict(id=self.id,
+                                                displayed_location=displayed_location,
+                                                type=e_type,
+                                                text=text)
+                    self.pdf_page_offset = pdf_location - location
+                    e_base += (7 + label_len)
+                    current_entry += 1
+
+                self.last_read_location = self.last_read - self.pdf_page_offset
+
+        else:
+            print "unsupported bookmark_extension: %s" % self.bookmark_extension
+        self.user_notes = user_notes
+
+    def get_book_length(self):
+        from calibre.ebooks.metadata.mobi import StreamSlicer
+        book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
+
+        self.book_length = 0
+        if self.bookmark_extension == 'mbp':
+            # Read the book len from the header
+            try:
+                with open(book_fs,'rb') as f:
+                    self.stream = StringIO(f.read())
+                    self.data = StreamSlicer(self.stream)
+                    self.nrecs, = unpack('>H', self.data[76:78])
+                    record0 = self.record(0)
+                    self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
+            except:
+                pass
+        elif self.bookmark_extension == 'tan':
+            # Read bookLength from metadata
+            from calibre.ebooks.metadata.topaz import MetadataUpdater
+            try:
+                with open(book_fs,'rb') as f:
+                    mu = MetadataUpdater(f)
+                    self.book_length = mu.book_length
+            except:
+                pass
+        elif self.bookmark_extension == 'pdr':
+            from calibre import plugins
+            try:
+                self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read())
+            except:
+                pass
+
+        else:
+            print "unsupported bookmark_extension: %s" % self.bookmark_extension
+
+# }}}
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -7,10 +7,11 @@ __docformat__ = 'restructuredtext en'
 '''
 Device driver for Amazon's Kindle
 '''
-import datetime, os, re, sys, json, hashlib
-from cStringIO import StringIO
-from struct import unpack

+import datetime, os, re, sys, json, hashlib
+
+from calibre.devices.kindle.apnx import APNXBuilder
+from calibre.devices.kindle.bookmark import Bookmark
 from calibre.devices.usbms.driver import USBMS

 '''
@ -170,6 +171,8 @@ class KINDLE2(KINDLE):
    description    = _('Communicate with the Kindle 2/3 eBook reader.')

    FORMATS        = KINDLE.FORMATS + ['pdf']
+    DELETE_EXTS    = KINDLE.DELETE_EXTS + ['.apnx']
+
    PRODUCT_ID = [0x0002, 0x0004]
    BCD        = [0x0100]

@ -205,6 +208,23 @@ class KINDLE2(KINDLE):
                if h in path_map:
                    book.device_collections = list(sorted(path_map[h]))

+    def upload_cover(self, path, filename, metadata, filepath):
+        '''
+        Hijacking this function to write the apnx file.
+        '''
+        if not filepath.lower().endswith('.mobi'):
+            return
+
+        apnx_path = '%s.apnx' % os.path.join(path, filename)
+        apnx_builder = APNXBuilder()
+        try:
+            apnx_builder.write_apnx(filepath, apnx_path)
+        except:
+            print 'Failed to generate APNX'
+            import traceback
+            traceback.print_exc()
+
+
 class KINDLE_DX(KINDLE2):

    name           = 'Kindle DX Device Interface'
@ -214,310 +234,3 @@ class KINDLE_DX(KINDLE2):
    PRODUCT_ID = [0x0003]
    BCD        = [0x0100]

-class Bookmark(): # {{{
-    '''
-    A simple class fetching bookmark data
-    Kindle-specific
-    '''
-    def __init__(self, path, id, book_format, bookmark_extension):
-        self.book_format = book_format
-        self.bookmark_extension = bookmark_extension
-        self.book_length = 0
-        self.id = id
-        self.last_read = 0
-        self.last_read_location = 0
-        self.path = path
-        self.timestamp = 0
-        self.user_notes = None
-
-        self.get_bookmark_data()
-        self.get_book_length()
-        try:
-            self.percent_read = min(float(100*self.last_read / self.book_length),100)
-        except:
-            self.percent_read = 0
-
-    def record(self, n):
-        from calibre.ebooks.metadata.mobi import StreamSlicer
-        if n >= self.nrecs:
-            raise ValueError('non-existent record %r' % n)
-        offoff = 78 + (8 * n)
-        start, = unpack('>I', self.data[offoff + 0:offoff + 4])
-        stop = None
-        if n < (self.nrecs - 1):
-            stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
-        return StreamSlicer(self.stream, start, stop)
-
-    def get_bookmark_data(self):
-        ''' Return the timestamp and last_read_location '''
-        from calibre.ebooks.metadata.mobi import StreamSlicer
-        user_notes = {}
-        if self.bookmark_extension == 'mbp':
-            MAGIC_MOBI_CONSTANT = 150
-            with open(self.path,'rb') as f:
-                stream = StringIO(f.read())
-                data = StreamSlicer(stream)
-                self.timestamp, = unpack('>I', data[0x24:0x28])
-                bpar_offset, = unpack('>I', data[0x4e:0x52])
-                lrlo = bpar_offset + 0x0c
-                self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
-                self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
-                entries, = unpack('>I', data[0x4a:0x4e])
-
-                # Store the annotations/locations
-                bpl = bpar_offset + 4
-                bpar_len, = unpack('>I', data[bpl:bpl+4])
-                bpar_len += 8
-                #print "bpar_len: 0x%x" % bpar_len
-                eo = bpar_offset + bpar_len
-
-                # Walk bookmark entries
-                #print " --- %s --- " % self.path
-                current_entry = 1
-                sig = data[eo:eo+4]
-                previous_block = None
-
-                while sig == 'DATA':
-                    text = None
-                    entry_type = None
-                    rec_len, = unpack('>I', data[eo+4:eo+8])
-                    if rec_len == 0:
-                        current_block = "empty_data"
-                    elif  data[eo+8:eo+12] == "EBAR":
-                        current_block = "data_header"
-                        #entry_type = "data_header"
-                        location, = unpack('>I', data[eo+0x34:eo+0x38])
-                        #print "data_header location: %d" % location
-                    else:
-                        current_block = "text_block"
-                        if previous_block == 'empty_data':
-                            entry_type = 'Note'
-                        elif previous_block == 'data_header':
-                            entry_type = 'Highlight'
-                        text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
-
-                    if entry_type:
-                        displayed_location = location/MAGIC_MOBI_CONSTANT + 1
-                        user_notes[location] = dict(id=self.id,
-                                                    displayed_location=displayed_location,
-                                                    type=entry_type,
-                                                    text=text)
-
-                    eo += rec_len + 8
-                    current_entry += 1
-                    previous_block = current_block
-                    sig = data[eo:eo+4]
-
-                while sig == 'BKMK':
-                    # Fix start location for Highlights using BKMK data
-                    end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
-
-                    if end_loc in user_notes and \
-                       (user_notes[end_loc]['type'] == 'Highlight' or \
-                        user_notes[end_loc]['type'] == 'Note'):
-                        # Switch location to start (0x08:0x0c)
-                        start, = unpack('>I', data[eo+8:eo+12])
-                        user_notes[start] = user_notes[end_loc]
-                        '''
-                        print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
-                                                                    end_loc,
-                                                                    end_loc/MAGIC_MOBI_CONSTANT + 1,
-                                                                    start,
-                                                                    start//MAGIC_MOBI_CONSTANT + 1)
-                        '''
-                        user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1
-                        user_notes.pop(end_loc)
-                    else:
-                        # If a bookmark coincides with a user annotation, the locs could
-                        # be the same - cheat by nudging -1
-                        # Skip bookmark for last_read_location
-                        if end_loc != self.last_read:
-                            # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
-                            displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
-                            user_notes[end_loc - 1] = dict(id=self.id,
-                                                           displayed_location=displayed_location,
-                                                           type='Bookmark',
-                                                           text=None)
-                    rec_len, = unpack('>I', data[eo+4:eo+8])
-                    eo += rec_len + 8
-                    sig = data[eo:eo+4]
-
-        elif self.bookmark_extension == 'tan':
-            from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata
-
-            def get_topaz_highlight(displayed_location):
-                # Parse My Clippings.txt for a matching highlight
-                # Search looks for book title match, highlight match, and location match
-                # Author is not matched
-                # This will find the first instance of a clipping only
-                book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
-                with open(book_fs,'rb') as f2:
-                    stream = StringIO(f2.read())
-                    mi = get_topaz_metadata(stream)
-                my_clippings = self.path
-                split = my_clippings.find('documents') + len('documents/')
-                my_clippings = my_clippings[:split] + "My Clippings.txt"
-                try:
-                    with open(my_clippings, 'r') as f2:
-                        marker_found = 0
-                        text = ''
-                        search_str1 = '%s' % (mi.title)
-                        search_str2 = '- Highlight Loc. %d' % (displayed_location)
-                        for line in f2:
-                            if marker_found == 0:
-                                if line.startswith(search_str1):
-                                    marker_found = 1
-                            elif marker_found == 1:
-                                if line.startswith(search_str2):
-                                    marker_found = 2
-                            elif marker_found == 2:
-                                if line.startswith('=========='):
-                                    break
-                                text += line.strip()
-                        else:
-                            raise Exception('error')
-                except:
-                    text = '(Unable to extract highlight text from My Clippings.txt)'
-                return text
-
-            MAGIC_TOPAZ_CONSTANT = 33.33
-            self.timestamp = os.path.getmtime(self.path)
-            with open(self.path,'rb') as f:
-                stream = StringIO(f.read())
-                data = StreamSlicer(stream)
-                self.last_read = int(unpack('>I', data[5:9])[0])
-                self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
-                entries, = unpack('>I', data[9:13])
-                current_entry = 0
-                e_base = 0x0d
-                while current_entry < entries:
-                    location, = unpack('>I', data[e_base+2:e_base+6])
-                    text = None
-                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
-                    e_type, = unpack('>B', data[e_base+1])
-                    if e_type == 0:
-                        e_type = 'Bookmark'
-                    elif e_type == 1:
-                        e_type = 'Highlight'
-                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
-                    elif e_type == 2:
-                        e_type = 'Note'
-                        text = data[e_base+0x10:e_base+0x10+text_len]
-                    else:
-                        e_type = 'Unknown annotation type'
-
-                    displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
-                    user_notes[location] = dict(id=self.id,
-                                                displayed_location=displayed_location,
-                                                type=e_type,
-                                                text=text)
-                    if text_len == 0xFFFFFFFF:
-                        e_base = e_base + 14
-                    else:
-                        e_base = e_base + 14 + 2 + text_len
-                    current_entry += 1
-                for location in user_notes:
-                    if location == self.last_read:
-                        user_notes.pop(location)
-                        break
-
-        elif self.bookmark_extension == 'pdr':
-            self.timestamp = os.path.getmtime(self.path)
-            with open(self.path,'rb') as f:
-                stream = StringIO(f.read())
-                data = StreamSlicer(stream)
-                self.last_read = int(unpack('>I', data[5:9])[0])
-                entries, = unpack('>I', data[9:13])
-                current_entry = 0
-                e_base = 0x0d
-                self.pdf_page_offset = 0
-                while current_entry < entries:
-                    '''
-                    location, = unpack('>I', data[e_base+2:e_base+6])
-                    text = None
-                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
-                    e_type, = unpack('>B', data[e_base+1])
-                    if e_type == 0:
-                        e_type = 'Bookmark'
-                    elif e_type == 1:
-                        e_type = 'Highlight'
-                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
-                    elif e_type == 2:
-                        e_type = 'Note'
-                        text = data[e_base+0x10:e_base+0x10+text_len]
-                    else:
-                        e_type = 'Unknown annotation type'
-
-                    if self.book_format in ['tpz','azw1']:
-                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
-                    elif self.book_format == 'pdf':
-                        # *** This needs implementation
-                        displayed_location = location
-                    user_notes[location] = dict(id=self.id,
-                                                displayed_location=displayed_location,
-                                                type=e_type,
-                                                text=text)
-                    if text_len == 0xFFFFFFFF:
-                        e_base = e_base + 14
-                    else:
-                        e_base = e_base + 14 + 2 + text_len
-                    current_entry += 1
-                    '''
-                    # Use label as page number
-                    pdf_location, = unpack('>I', data[e_base+1:e_base+5])
-                    label_len, = unpack('>H', data[e_base+5:e_base+7])
-                    location = int(data[e_base+7:e_base+7+label_len])
-                    displayed_location = location
-                    e_type = 'Bookmark'
-                    text = None
-                    user_notes[location] = dict(id=self.id,
-                                                displayed_location=displayed_location,
-                                                type=e_type,
-                                                text=text)
-                    self.pdf_page_offset = pdf_location - location
-                    e_base += (7 + label_len)
-                    current_entry += 1
-
-                self.last_read_location = self.last_read - self.pdf_page_offset
-
-        else:
-            print "unsupported bookmark_extension: %s" % self.bookmark_extension
-        self.user_notes = user_notes
-
-    def get_book_length(self):
-        from calibre.ebooks.metadata.mobi import StreamSlicer
-        book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
-
-        self.book_length = 0
-        if self.bookmark_extension == 'mbp':
-            # Read the book len from the header
-            try:
-                with open(book_fs,'rb') as f:
-                    self.stream = StringIO(f.read())
-                    self.data = StreamSlicer(self.stream)
-                    self.nrecs, = unpack('>H', self.data[76:78])
-                    record0 = self.record(0)
-                    self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
-            except:
-                pass
-        elif self.bookmark_extension == 'tan':
-            # Read bookLength from metadata
-            from calibre.ebooks.metadata.topaz import MetadataUpdater
-            try:
-                with open(book_fs,'rb') as f:
-                    mu = MetadataUpdater(f)
-                    self.book_length = mu.book_length
-            except:
-                pass
-        elif self.bookmark_extension == 'pdr':
-            from calibre import plugins
-            try:
-                self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read())
-            except:
-                pass
-
-        else:
-            print "unsupported bookmark_extension: %s" % self.bookmark_extension
-
-# }}}
-
--- a/src/calibre/devices/nokia/driver.py
+++ b/src/calibre/devices/nokia/driver.py
@ -76,11 +76,11 @@ class E52(USBMS):
    supported_platforms = ['windows', 'linux', 'osx']

    VENDOR_ID = [0x421]
-    PRODUCT_ID = [0x1CD]
+    PRODUCT_ID = [0x1CD, 0x273]
    BCD = [0x100]


-    FORMATS = ['mobi', 'prc']
+    FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'txt']

    EBOOK_DIR_MAIN = 'eBooks'
    SUPPORTS_SUB_DIRS = True
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -216,21 +216,22 @@ class EPUBOutput(OutputFormatPlugin):
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)

            from calibre.ebooks.epub import initialize_container
-            epub = initialize_container(output_path, os.path.basename(opf),
-                    extra_entries=extra_entries)
-            epub.add_dir(tdir)
-            if encryption is not None:
-                epub.writestr('META-INF/encryption.xml', encryption)
-            if metadata_xml is not None:
-                epub.writestr('META-INF/metadata.xml',
-                        metadata_xml.encode('utf-8'))
+            with initialize_container(output_path, os.path.basename(opf),
+                    extra_entries=extra_entries) as epub:
+                epub.add_dir(tdir)
+                if encryption is not None:
+                    epub.writestr('META-INF/encryption.xml', encryption)
+                if metadata_xml is not None:
+                    epub.writestr('META-INF/metadata.xml',
+                            metadata_xml.encode('utf-8'))
            if opts.extract_to is not None:
+                from calibre.utils.zipfile import ZipFile
                if os.path.exists(opts.extract_to):
                    shutil.rmtree(opts.extract_to)
                os.mkdir(opts.extract_to)
-                epub.extractall(path=opts.extract_to)
+                with ZipFile(output_path) as zf:
+                    zf.extractall(path=opts.extract_to)
                self.log.info('EPUB extracted to', opts.extract_to)
-            epub.close()

    def encrypt_fonts(self, uris, tdir, uuid): # {{{
        from binascii import unhexlify
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -247,30 +247,24 @@ class Amazon(MetadataSource): # {{{

    # }}}

-class LibraryThing(MetadataSource): # {{{
+class KentDistrictLibrary(MetadataSource): # {{{

-    name = 'LibraryThing'
+    name = 'Kent District Library'
    metadata_type = 'social'
-    description = _('Downloads series/covers/rating information from librarything.com')
+    description = _('Downloads series information from ww2.kdl.org')

    def fetch(self):
-        if not self.isbn or not self.site_customization:
+        if not self.title or not self.book_author:
            return
-        from calibre.ebooks.metadata.library_thing import get_social_metadata
-        un, _, pw = self.site_customization.partition(':')
+        from calibre.ebooks.metadata.kdl import get_series
        try:
-            self.results = get_social_metadata(self.title, self.book_author,
-                    self.publisher, self.isbn, username=un, password=pw)
+            self.results = get_series(self.title, self.book_author)
        except Exception, e:
+            import traceback
+            traceback.print_exc()
            self.exception = e
            self.tb = traceback.format_exc()

-    @property
-    def string_customization_help(self):
-        ans = _('To use librarything.com you must sign up for a %sfree account%s '
-                'and enter your username and password separated by a : below.')
-        return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
-
    # }}}


--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re, urllib, urlparse
+
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre import browser
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.chardet import xml_to_unicode
+
+URL = \
+"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
+
+_ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
+
+def get_series(title, authors):
+    mi = Metadata(title, authors)
+    if title and title[0] in _ignore_starts:
+        title = title[1:]
+    title = re.sub(r'^(A|The|An)\s+', '', title).strip()
+    if not title:
+        return mi
+    if isinstance(title, unicode):
+        title = title.encode('utf-8')
+
+    title = urllib.quote_plus(title)
+
+    author = authors[0].strip()
+    if not author:
+        return mi
+    if ',' in author:
+        author = author.split(',')[0]
+    else:
+        author = author.split()[-1]
+
+    url = URL.format(author, title)
+    br = browser()
+    raw = br.open(url).read()
+    if 'see the full results' not in raw:
+        return mi
+    raw = xml_to_unicode(raw)[0]
+    soup = BeautifulSoup(raw)
+    searcharea = soup.find('div', attrs={'class':'searcharea'})
+    if searcharea is None:
+        return mi
+    ss = searcharea.find('div', attrs={'class':'seriessearch'})
+    if ss is None:
+        return mi
+    a = ss.find('a', href=True)
+    if a is None:
+        return mi
+    href = a['href'].partition('?')[-1]
+    data = urlparse.parse_qs(href)
+    series = data.get('SeriesName', [])
+    if not series:
+        return mi
+    series = series[0]
+    series = re.sub(r' series$', '', series).strip()
+    if series:
+        mi.series = series
+    ns = ss.nextSibling
+    if ns.contents:
+        raw = unicode(ns.contents[0])
+        raw = raw.partition('.')[0].strip()
+        try:
+            mi.series_index = int(raw)
+        except:
+            pass
+    return mi
+
+
+if __name__ == '__main__':
+    import sys
+    print get_series(sys.argv[-2], [sys.argv[-1]])
+
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -39,6 +39,13 @@ def asfloat(value):
        return 0.0
    return float(value)

+def isspace(text):
+    if not text:
+        return True
+    if u'\xa0' in text:
+        return False
+    return text.isspace()
+
 class BlockState(object):
    def __init__(self, body):
        self.body = body
@ -438,7 +445,7 @@ class MobiMLizer(object):
        if elem.text:
            if istate.preserve:
                text = elem.text
-            elif len(elem) > 0 and elem.text.isspace():
+            elif len(elem) > 0 and isspace(elem.text):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
@ -481,7 +488,7 @@ class MobiMLizer(object):
            if child.tail:
                if istate.preserve:
                    tail = child.tail
-                elif bstate.para is None and child.tail.isspace():
+                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(' ', child.tail)
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -70,7 +70,7 @@ class PML_HTMLizer(object):
        'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
        'r': ('<div style="text-align: right;">', '</div>'),
        't': ('<div style="margin-left: 5%;">', '</div>'),
-        'T': ('<div style="margin-left: %s;">', '</div>'),
+        'T': ('<div style="text-indent: %s;">', '</div>'),
        'i': ('<span style="font-style: italic;">', '</span>'),
        'u': ('<span style="text-decoration: underline;">', '</span>'),
        'd': ('<span style="text-decoration: line-through;">', '</span>'),
@ -499,7 +499,13 @@ class PML_HTMLizer(object):
        self.toc = []
        self.file_name = file_name

-        indent_state = {'t': False, 'T': False}
+        # t: Are we in an open \t tag set?
+        # T: Are we in an open \T?
+        # st: Did the \t start the line?
+        # sT: Did the \T start the line?
+        # et: Did the \t end the line?
+        indent_state = {'t': False, 'T': False, 'st': False, 'sT': False, 'et': False}
+        basic_indent = False
        adv_indent_val = ''
        # Keep track of the number of empty lines
        # between paragraphs. When we reach a set number
@ -512,8 +518,26 @@ class PML_HTMLizer(object):
        for line in pml.splitlines():
            parsed = []
            empty = True
+
            basic_indent = indent_state['t']
-            adv_indent = indent_state['T']
+            indent_state['T'] = False
+            # Determine if the \t starts the line or if we are
+            # in an open \t block.
+            if line.lstrip().startswith('\\t') or basic_indent:
+                basic_indent = True
+                indent_state['st'] = True
+            else:
+                indent_state['st'] = False
+            # Determine if the \T starts the line.
+            if line.lstrip().startswith('\\T'):
+                indent_state['sT'] = True
+            else:
+                indent_state['sT'] = False
+            # Determine if the \t ends the line.
+            if line.rstrip().endswith('\\t'):
+                indent_state['et'] = True
+            else:
+                indent_state['et'] = False

            # Must use StringIO, cStringIO does not support unicode
            line = StringIO.StringIO(line)
@ -575,13 +599,10 @@ class PML_HTMLizer(object):
                        empty = False
                        text = '<hr width="%s" />' % self.code_value(line)
                    elif c == 't':
-                        indent_state[c] = not indent_state[c]
-                        if indent_state[c]:
-                            basic_indent = True
+                        indent_state['t'] = not indent_state['t']
                    elif c == 'T':
                        # Ensure we only store the value on the first T set for the line.
                        if not indent_state['T']:
-                            adv_indent = True
                            adv_indent_val = self.code_value(line)
                        else:
                            # We detected a T previously on this line.
@ -610,10 +631,23 @@ class PML_HTMLizer(object):
                text = self.end_line()
                parsed.append(text)
                
+                # Basic indent will be set if the \t starts the line or
+                # if we are in a continuing \t block.
                if basic_indent:
-                    parsed.insert(0, self.STATES_TAGS['t'][0])
-                    parsed.append(self.STATES_TAGS['t'][1])
-                elif adv_indent:
+                    # if the \t started the line and either it ended the line or the \t
+                    # block is still open use a left margin.
+                    if indent_state['st'] and (indent_state['et'] or indent_state['t']):
+                        parsed.insert(0, self.STATES_TAGS['t'][0])
+                        parsed.append(self.STATES_TAGS['t'][1])
+                    # Use a text indent instead of a margin.
+                    # This handles cases such as:
+                    # \tO\tne upon a time...
+                    else:
+                        parsed.insert(0, self.STATES_TAGS['T'][0] % '5%')
+                        parsed.append(self.STATES_TAGS['T'][1])
+                # \t will override \T's on the line.
+                # We only handle \T's that started the line.
+                elif indent_state['T'] and indent_state['sT']:
                    parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
                    parsed.append(self.STATES_TAGS['T'][1])
                    indent_state['T'] = False
--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -237,6 +237,7 @@ class ChooseLibraryAction(InterfaceAction):
            return
        self.stats.rename(location, newloc)
        self.build_menus()
+        self.gui.iactions['Copy To Library'].build_menus()

    def delete_requested(self, name, location):
        loc = location.replace('/', os.sep)
@ -253,6 +254,7 @@ class ChooseLibraryAction(InterfaceAction):
                pass
        self.stats.remove(location)
        self.build_menus()
+        self.gui.iactions['Copy To Library'].build_menus()

    def backup_status(self, location):
        dirty_text = 'no'
@ -329,6 +331,7 @@ class ChooseLibraryAction(InterfaceAction):
                    ' libraries.')%loc, show=True)
            self.stats.remove(location)
            self.build_menus()
+            self.gui.iactions['Copy To Library'].build_menus()
            return

        prefs['library_path'] = loc
@ -371,9 +374,20 @@ class ChooseLibraryAction(InterfaceAction):
        if not self.change_library_allowed():
            return
        from calibre.gui2.dialogs.choose_library import ChooseLibrary
+        self.gui.library_view.save_state()
        db = self.gui.library_view.model().db
-        c = ChooseLibrary(db, self.gui.library_moved, self.gui)
+        location = self.stats.canonicalize_path(db.library_path)
+        self.pre_choose_dialog_location = location
+        c = ChooseLibrary(db, self.choose_library_callback, self.gui)
        c.exec_()
+        self.choose_dialog_library_renamed = getattr(c, 'library_renamed', False)
+
+    def choose_library_callback(self, newloc, copy_structure=False):
+        self.gui.library_moved(newloc, copy_structure=copy_structure)
+        if getattr(self, 'choose_dialog_library_renamed', False):
+            self.stats.rename(self.pre_choose_dialog_location, prefs['library_path'])
+        self.build_menus()
+        self.gui.iactions['Copy To Library'].build_menus()

    def change_library_allowed(self):
        if os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH', None):
--- a/src/calibre/gui2/dialogs/choose_library.py
+++ b/src/calibre/gui2/dialogs/choose_library.py
@ -71,6 +71,8 @@ class ChooseLibrary(QDialog, Ui_Dialog):
            prefs['library_path'] = loc
            self.callback(loc, copy_structure=self.copy_structure.isChecked())
        else:
+            self.db.prefs.disable_setting = True
+            self.library_renamed = True
            move_library(self.db.library_path, loc, self.parent(),
                    self.callback)

--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@ -60,7 +60,8 @@ class Tweak(object): # {{{
        return ans

    def __cmp__(self, other):
-        return cmp(self.is_customized, getattr(other, 'is_customized', False))
+        return -1 * cmp(self.is_customized,
+                            getattr(other, 'is_customized', False))

    @property
    def is_customized(self):
@ -111,7 +112,10 @@ class Tweaks(QAbstractListModel): # {{{
        if role == Qt.ToolTipRole:
            tt = _('This tweak has it default value')
            if tweak.is_customized:
-                tt = _('This tweak has been customized')
+                tt = '<p>'+_('This tweak has been customized')
+                tt += '<pre>'
+                for varn, val in tweak.custom_values.iteritems():
+                    tt += '%s = %r\n\n'%(varn, val)
            return tt
        if role == Qt.UserRole:
            return tweak
@ -136,6 +140,7 @@ class Tweaks(QAbstractListModel): # {{{
                pos = self.read_tweak(lines, pos, dl, l)
            pos += 1

+        self.tweaks.sort()
        default_keys = set(dl.iterkeys())
        custom_keys = set(l.iterkeys())

@ -227,8 +232,12 @@ class PluginTweaks(QDialog): # {{{
        self.highlighter = PythonHighlighter(self.edit.document())
        self.l = QVBoxLayout()
        self.setLayout(self.l)
-        self.l.addWidget(QLabel(
-            _('Add/edit tweaks for any custom plugins you have installed.')))
+        self.msg = QLabel(
+            _('Add/edit tweaks for any custom plugins you have installed. '
+                'Documentation for these tweaks should be available '
+                'on the website from where you downloaded the plugins.'))
+        self.msg.setWordWrap(True)
+        self.l.addWidget(self.msg)
        self.l.addWidget(self.edit)
        self.edit.setPlainText(raw)
        self.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -440,16 +440,17 @@ class Document(QWebPage): # {{{

    @property
    def height(self):
-        j = self.javascript('document.body.offsetHeight', 'int')
+        # Note that document.body.offsetHeight does not include top and bottom
+        # margins on body and in some cases does not include the top margin on
+        # the first element inside body either. See ticket #8791 for an example
+        # of the latter.
        q = self.mainFrame().contentsSize().height()
-        if q == j:
-            return j
-        if min(j, q) <= 0:
-            return max(j, q)
-        window_height = self.window_height
-        if j == window_height:
-            return j if q < 1.2*j else q
-        return j
+        if q < 0:
+            # Don't know if this is still needed, but it can't hurt
+            j = self.javascript('document.body.offsetHeight', 'int')
+            if j >= 0:
+                q = j
+        return q

    @property
    def width(self):
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@ -7,7 +7,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import os, traceback, cStringIO, re, shutil
-from functools import partial

 from calibre.constants import DEBUG
 from calibre.utils.config import Config, StringConfig, tweaks
@ -142,11 +141,19 @@ class SafeFormat(TemplateFormatter):
 def get_components(template, mi, id, timefmt='%b %Y', length=250,
        sanitize_func=ascii_filename, replace_whitespace=False,
        to_lowercase=False):
-    tsfmt = partial(title_sort, order=tweaks['save_template_title_series_sorting'])
+
+    tsorder = tweaks['save_template_title_series_sorting']
    format_args = FORMAT_ARGS.copy()
    format_args.update(mi.all_non_none_fields())
    if mi.title:
-        format_args['title'] = tsfmt(mi.title)
+        if tsorder == 'strictly_alphabetic':
+            v = mi.title
+        else:
+            # title_sort might be missing or empty. Check both conditions
+            v = mi.get('title_sort', None)
+            if not v:
+                v = title_sort(mi.title, order=tsorder)
+        format_args['title'] = v
    if mi.authors:
        format_args['authors'] = mi.format_authors()
        format_args['author'] = format_args['authors']
@ -157,7 +164,7 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
    else:
        format_args['tags'] = ''
    if mi.series:
-        format_args['series'] = tsfmt(mi.series)
+        format_args['series'] = title_sort(mi.series, order=tsorder)
        if mi.series_index is not None:
            format_args['series_index'] = mi.format_series_index()
    else:
@ -176,7 +183,7 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
            cm = custom_metadata[key]
            ## TODO: NEWMETA: should ratings be divided by 2? The standard rating isn't...
            if cm['datatype'] == 'series':
-                format_args[key] = tsfmt(format_args[key])
+                format_args[key] = title_sort(format_args[key], order=tsorder)
                if key+'_index' in format_args:
                    format_args[key+'_index'] = fmt_sidx(format_args[key+'_index'])
            elif cm['datatype'] == 'datetime':
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -561,9 +561,10 @@ format, whether input or output are available in the conversion dialog under the
 Convert Microsoft Word documents
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-|app| does not directly convert .doc files from Microsoft Word. However, in Word, you can save the document
+|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document
 as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
-"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well.
+"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word
+produces really messy HTML, converting it can take a long time, so be patient.

 There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
 generating the Table of Contents much simpler. It is called BookCreator and is available for free
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sc.po
+++ b/src/calibre/translations/sc.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/ta.po
+++ b/src/calibre/translations/ta.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/translations/th.po
+++ b/src/calibre/translations/th.po
--- a/src/calibre/translations/tr.po
+++ b/src/calibre/translations/tr.po
--- a/src/calibre/translations/uk.po
+++ b/src/calibre/translations/uk.po
--- a/src/calibre/translations/ur.po
+++ b/src/calibre/translations/ur.po
--- a/src/calibre/translations/vi.po
+++ b/src/calibre/translations/vi.po
--- a/src/calibre/translations/yi.po
+++ b/src/calibre/translations/yi.po
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
--- a/src/calibre/translations/zh_HK.po
+++ b/src/calibre/translations/zh_HK.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -8,11 +8,13 @@ import re, htmlentitydefs
 _ascii_pat = None

 def clean_ascii_chars(txt, charlist=None):
-    'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
+    '''
+    Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
+    This is all control chars except \\t,\\n and \\r
+    '''
    global _ascii_pat
    if _ascii_pat is None:
-        chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
-            + [0x1A, 0x1B]
+        chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
        _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))

    if charlist is None:
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -13,6 +13,7 @@ from calibre.web.feeds.feedparser import parse
 from calibre.utils.logging import default_log
 from calibre import entity_to_unicode, strftime
 from calibre.utils.date import dt_factory, utcnow, local_tz
+from calibre.utils.cleantext import clean_ascii_chars

 class Article(object):

@ -43,7 +44,7 @@ class Article(object):
                print summary.encode('utf-8')
                traceback.print_exc()
                summary = u''
-        self.text_summary = summary
+        self.text_summary = clean_ascii_chars(summary)
        self.author = author
        self.content = content
        self.date = published