KG 0.7.45

2025-08-11 09:13:57 -04:00 · 2011-02-11 14:53:40 -07:00 · 2011-02-11 14:53:40 -07:00 · ce56d9ea72
commit ce56d9ea72
parent f274a7a5bf 2d00173e9e
90 changed files with 83002 additions and 65469 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,106 @@
 #  new recipes:
 #    - title: 
 - version: 0.7.45
  date: 2011-02-11
  new features:
    - title: "Add plugin to download series information from the Kent District Library"
    - title: "Kindle driver: When uploading MOBI files to the device, upload page number information as well (used by the not yet released Kindle 3.1 firmware)"
    - title: "When automatically sending news to device, send to main memory preferentially, if it has enough space."
      tickets: [8877]
    - title: "Allow customization of which metadata fields are searched by default (click the preferences icon next to the search box)"
    - title: "New format TXTZ: which is a zip file containing the TXT file and associated images + metadata. calibre can convert to and from this format and read/write metadata to it."
    - title: "New option to control how automerge handles duplicate formats when adding books to your calibre library. See Preferences->Adding books"
    - title: "Driver for Nokia X6, Nexus S, WiBook, MyTouch 4G and Huawei Ideos S7"
    - title: "Nicer interface for editing tweaks"
    - title: "Add tweak to remove yellow lines from edges of book list"
    - title: "Completion: Restore adding of comma at end after completion for tags type fields. Add a tweak to control if an & is added after completion for author type fields"
    - title: "Turn search as you type off by default for searching the book list. You can turn it on by clicking the preferences button next to the search bar."
    - title: "TXT Input: Add option to remove indents and fix bug where spaces were not retained properly."
  bug fixes:
    - title: "Fix a regression in 0.7.44 that could cause setting authors to fail in windows when the author name is very long"
      tickets: [8797]
    - title: "E-book viewer: Fix bug that could cause the bottom of chapters to get cut-off if the topmost element had a large top margin."
      tickets: [8791]
    - title: "Fix regression that caused a spurious error message after moving a library. Also ensure that the entries in the Copy to Library menu are updated after a library is moved/renamed/deleted."
      tickets: [8905]
    - title: "PML Input: New handling of t and T tags. T's that do not start the line are ignored. t's that start and end the line use a margin for the text block"
    - title: "News download: Remove all invalid ASCII control characters from article descriptions as they cause XML parsing to fail"
    - title: "MOBI Output: Fix bug that was discarding non breaking spaces at the start of a paragraph when they were followed immediately by a tag."
      tickets: [4887]
    - title: "LIT Input: Fix a regression in handling LIT files that contain txt rather than html data"
      tickets: [8904]
    - title: "Fix bug in search box in the plugins dialog"
      tickets: [8882]
    - title: "Fix renaming of categories via the Tag Browser"
      tickets: [8807]
    - title: "Content server: Do not send mobile version to iPad"
      tickets: [8820]
    - title: "Fix undefined publication date appearing in book jacket as 101"
      tickets: [8799]
    - title: "Heuristics: Fix issue with invalid markup from italicize patterns."
    - title: "TXT Input: De-hyphenate textile and markdown inpu as well. Fix inline toc not showing all items."
    - title: "RTF Input: More encoding token splitting fixes."
    - title: "Fix regression that broke the convenience Email to xxx entry in the connect share menu."
      tickets: [8775]
    - title: "Fix editing of series type custom columns in the book list."
      tickets: [8765]
  improved recipes:
    - El periodico de Aragon
    - B92
    - French Belgian news sources
  new recipes:
    - title: "ABC.es"
      author: "Ricardo Jurado"
    - title: "Korespondent and Kopalnia Wiedzy"
      author: "Attis"
    - title: "Radio Prague"
      author: "Francois Pellicaan"
    - title: "Europa Press"
      author: "Luis Hernandez"
    - title: "Interoperability Happens and njuz.net"
      author: "Darko Miletic"
    - title: "Weblogs SL"
      author: "desUBIKado"
    - title: "Kompas and Jakarta Post"
      author: "Adrian Gunawan"
 - version: 0.7.44
  date: 2011-02-04
--- a/format_docs/pdb/apnx.txt
+++ b/format_docs/pdb/apnx.txt
@ -0,0 +1,69 @@
 APNX
 ----
 apnx files are used by the Amazon Kindle (firmware revision 3.1+) to
 map pages from a print book to the Kindle version. Integers within
 the file are big-endian.
 Layout
 ------
 bytes   content             comments 
 4       00010001            Format identifier. Value of 65537 little-endian.
 4       start of next       The offset after ending location of the first header.
                            Starts a new sequence of header info
 4       length              Length of first header
 N       first header        String containing content header
 Starts next sequence
 2       unknown             Always 1
 2       length              Length of second header
 2       page count          Total number of bytes after second header that
                            represent pages. This total includes bytes that
                            are ignored by the pageMap.
 2       unknown             Always 32
 N       second header       String containing the page mapping header
 4*N     padding             The first number given in the page mapping header indicates the number of 0 bytes.
 4*N     page list           
 Content Header
 --------------
 The content header is a string enclosed in {} containing key, value pairs.
 content             comments
 contentGuid         Guid.
 asin                Amazon identifier for the Kindle version of the book.
 cdeType             MOBI cdeType. Should always be EBOK for ebooks.
 fileRevisionId      Revision of this file.
 Example:
 {"contentGuid":"d8c14b0","asin":"B000JML5VM","cdeType":"EBOK","fileRevisionId":"1296874359405"}
 Page Mapping Header
 -------------------
 The page mapping header is a string enclosed in {} containing key, value pairs.
 content             comments
 asin                The ISBN 10 for the paper book the pages correspond to
 pageMap             Three value tuple. Looks like: "(N,N,N)"
                    1) Number of bytes after header that starts the page numbering sequence
                    2) unknown
                    3) unknown
 Example:
 {"asin":"1906694184","pageMap":"(4,a,1)"}
 Page List
 ---------
 The page list is a sequence of offsets in the uncompressed HTML. Each
 value is the beginning of a new page. Each entry is a 4 byte big endian
 int. The list is ordered lowest to highest.
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -126,12 +126,14 @@ sort_columns_at_startup = None
 gui_pubdate_display_format = 'MMM yyyy'
 gui_timestamp_display_format = 'dd MMM yyyy'
-#: Control sorting of titles and series in the display
+#: Control sorting of titles and series in the library display
-# Control title and series sorting in the library view.
+# Control title and series sorting in the library view. If set to
-# If set to 'library_order', Leading articles such as The and A will be ignored.
+# 'library_order', the title sort field will be used instead of the title.
-# If set to 'strictly_alphabetic', the titles will be sorted without processing
+# Unless you have manually edited the title sort field, leading articles such as
-# For example, with library_order, The Client will sort under 'C'. With
+# The and A will be ignored. If set to 'strictly_alphabetic', the titles will be
-# strictly_alphabetic, the book will sort under 'T'.
+# sorted as-is (sort by title instead of title sort). For example, with
 # library_order, The Client will sort under 'C'. With strictly_alphabetic, the
 # book will sort under 'T'.
 # This flag affects Calibre's library display. It has no effect on devices. In
 # addition, titles for books added before changing the flag will retain their
 # order until the title is edited. Double-clicking on a title and hitting return
@ -140,11 +142,15 @@ title_series_sorting = 'library_order'
 #: Control formatting of title and series when used in templates
 # Control how title and series names are formatted when saving to disk/sending
-# to device. If set to library_order, leading articles such as The and A will
+# to device. The behavior depends on the field being processed. If processing
-# be put at the end
+# title, then if this tweak is set to 'library_order', the title will be
-# If set to 'strictly_alphabetic', the titles will be sorted without processing
+# replaced with title_sort. If it is set to 'strictly_alphabetic', then the
-# For example, with library_order, "The Client" will become "Client, The". With
+# title will not be changed. If processing series, then if set to
-# strictly_alphabetic, it would remain "The Client".
+# 'library_order', articles such as 'The' and 'An' will be moved to the end. If
 # set to 'strictly_alphabetic', the series will be sent without change.
 # For example, if the tweak is set to library_order, "The Lord of the Rings"
 # will become "Lord of the Rings, The". If the tweak is set to
 # strictly_alphabetic, it would remain "The Lord of the Rings".
 save_template_title_series_sorting = 'library_order'
 #: Set the list of words considered to be "articles" for sort strings
--- a/resources/recipes/abc_es.recipe
+++ b/resources/recipes/abc_es.recipe
@ -0,0 +1,68 @@
 __license__   = 'GPL v3'
 __author__    = 'Ricardo Jurado'
 __copyright__ = 'Ricardo Jurado'
 __version__     = 'v0.4'
 __date__        = '11 February 2011'
 '''
 http://www.abc.es/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1296604369(BasicNewsRecipe):
    title          = u'ABC.es'
    masthead_url   = 'http://www.abc.es/img/logo-abc.gif'
    cover_url      = 'http://www.abc.es/img/logo-abc.gif'
    publisher      = u'Grupo VOCENTO'
    __author__            = 'Ricardo Jurado'
    description           = 'Noticias de Spain y el mundo'
    category              = 'News,Spain,National,International,Economy'
    oldest_article = 2
    max_articles_per_feed = 10
    no_stylesheets = True
    use_embedded_content = False
    encoding = 'ISO-8859-1'
    remove_javascript = True
    language = 'es'
    extra_css             = """
                               p{text-align: justify; font-size: 100%}
                               body{ text-align: left; font-size:100% }
                               h3{font-family: sans-serif; font-size:120%; font-weight:bold; text-align: justify; }
                               h2{font-family: sans-serif; font-size:100%; font-weight:bold; text-align: justify; }
                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
                                 """
    keep_only_tags = [
 #                 dict(name='h2', attrs={'class':['logos']}),
                 dict(name='h3', attrs={'class':['overhead']}),
                 dict(name='h1', attrs={'class':'headline'}),
                 dict(name='h3', attrs={'class':['subhead']}),
                 dict(name='div', attrs={'class':'datosi'}),
                 dict(name='div', attrs={'class':'photo-alt1'}),
                 dict(name='div', attrs={'class':'text'})
                 ]
 #    remove_tags_before = dict(name='div' , attrs={'id':['cabecera2']})
    feeds          = [
                     (u'PORTADA', u'http://www.abc.es/rss/feeds/abcPortada.xml')
                    ,(u'ULTIMAS', u'http://www.abc.es/rss/feeds/abc_ultima.xml')
                    ,(u'NACIONAL', u'http://www.abc.es/rss/feeds/abc_EspanaEspana.xml')
                    ,(u'INTERNACIONAL', u'http://www.abc.es/rss/feeds/abc_Internacional.xml')
                    ,(u'OPINION', u'http://www.abc.es/rss/feeds/abc_opinioncompleto.xml')
                    ,(u'BLOGS ABC', u'http://www.abc.es/rss/feeds/blogs-abc.xml')
                    ,(u'ECONOMIA', u'http://www.abc.es/rss/feeds/abc_Economia.xml')
                    ,(u'CIENCIA Y TECNOLOGIA', u'http://www.abc.es/rss/feeds/abc_Ciencia_Tecnologia.xml')
                    ,(u'CULTURA', u'http://www.abc.es/rss/feeds/abc_Cultura.xml')
                    ,(u'LIBROS', u'http://www.abc.es/rss/feeds/abc_Libros.xml')
                    ,(u'MEDIOS Y REDES', u'http://www.abc.es/rss/feeds/ABC_Medios_Redes.xml')
                    ,(u'EVASION', u'http://www.abc.es/rss/feeds/abc_evasion.xml')
                    ,(u'ESPECTACULOS', u'http://www.abc.es/rss/feeds/abc_Espectaculos.xml')
                    ,(u'GENTE', u'http://www.abc.es/rss/feeds/abc_Gente.xml')
                    ,(u'DEPORTES', u'http://www.abc.es/rss/feeds/abc_Deportes.xml')
                     ]
--- a/resources/recipes/el_periodico.recipe
+++ b/resources/recipes/el_periodico.recipe
@ -5,8 +5,8 @@ __license__     = 'GPL v3'
 __copyright__   = '04 December 2010, desUBIKado'
 __author__      = 'desUBIKado'
 __description__ = 'Daily newspaper from Aragon'
-__version__     = 'v0.05'
+__version__     = 'v0.07'
-__date__        = '07, December 2010'
+__date__        = '06, February 2011'
 '''
 elperiodicodearagon.com
 '''
@ -38,7 +38,8 @@ class elperiodicodearagon(BasicNewsRecipe):
                            ,'publisher' : publisher
                         }
-    feeds              = [(u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
+    feeds              = [
                           (u'Arag\xf3n', u'http://elperiodicodearagon.com/RSS/2.xml'),
                           (u'Internacional', u'http://elperiodicodearagon.com/RSS/4.xml'),
                           (u'Espa\xf1a', u'http://elperiodicodearagon.com/RSS/3.xml'),
                           (u'Econom\xeda', u'http://elperiodicodearagon.com/RSS/5.xml'),
@ -47,13 +48,16 @@ class elperiodicodearagon(BasicNewsRecipe):
                           (u'Opini\xf3n', u'http://elperiodicodearagon.com/RSS/103.xml'),
                           (u'Escenarios', u'http://elperiodicodearagon.com/RSS/105.xml'),
                           (u'Sociedad', u'http://elperiodicodearagon.com/RSS/104.xml'),
-                          (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')]
+                           (u'Gente', u'http://elperiodicodearagon.com/RSS/330.xml')
                         ]
    extra_css = '''
-                    h3{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;}
+                    h3 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    h2 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:18px;}
-                    dd{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
+                    h4 {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:20px;}
                    .columnaDeRecursosRelacionados {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
                    img{margin-bottom: 0.4em}
 		'''
    remove_attributes = ['height','width']
@ -82,6 +86,7 @@ class elperiodicodearagon(BasicNewsRecipe):
                          dict(name='a', attrs={'class':'AvisoComentario'}),
                          dict(name='div', attrs={'class':'CajaAvisoComentario'}),
                          dict(name='div', attrs={'class':'navegaNoticias'}),
                          dict(name='div', attrs={'class':'Mensaje'}),
                          dict(name='div', attrs={'id':'PaginadorDiCom'}),
                          dict(name='div', attrs={'id':'CajaAccesoCuentaUsuario'}),
                          dict(name='div', attrs={'id':'CintilloComentario'}),
@ -107,3 +112,15 @@ class elperiodicodearagon(BasicNewsRecipe):
        (re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<p id="">', re.DOTALL|re.IGNORECASE), lambda match: '<p>')
        ]
    # Para sustituir el video incrustado de YouTube por una imagen
    def preprocess_html(self, soup):
        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
            if video_yt:
               video_yt.name = 'img'
               fuente = video_yt['src']
               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
               video_yt['src'] = fuente2 + '/0.jpg'
        return soup
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -182,6 +182,10 @@ class NYTimes(BasicNewsRecipe):
                            'mediaOverlay slideshow',
                            'headlinesOnly multiline flush',
                            'wideThumb',
                            'video', #added 02-11-2011
                            'videoHeader',#added 02-11-2011
                            'articleInlineVideoHolder', #added 02-11-2011
                            'assetCompanionAd',
                            re.compile('^subNavigation'),
                            re.compile('^leaderboard'),
                            re.compile('^module'),
@ -664,7 +668,7 @@ class NYTimes(BasicNewsRecipe):
            try:
                #remove "Related content" bar
-                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']})
+                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft  ']})
                if runAroundsFound:
                    for runAround in runAroundsFound:
                        #find all section headers
@ -672,6 +676,12 @@ class NYTimes(BasicNewsRecipe):
                        if hlines:
                            for hline in hlines:
                                hline.extract()
                        #find all section headers
                        hlines = runAround.findAll('h6')
                        if hlines:
                            for hline in hlines:
                                hline.extract()
            except:
                self.log("Error removing related content bar")
--- a/resources/recipes/tedneward.recipe
+++ b/resources/recipes/tedneward.recipe
@ -0,0 +1,33 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blogs.tedneward.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class InteroperabilityHappens(BasicNewsRecipe):
    title                 = 'Interoperability Happens'
    __author__            = 'Darko Miletic'
    description           = 'Tech blog by Ted Neward'
    oldest_article        = 15
    max_articles_per_feed = 100
    language              = 'en'
    encoding              = 'utf-8'
    no_stylesheets        = True
    use_embedded_content  = True
    publication_type      = 'blog'
    extra_css             = """
                                body{font-family: Verdana,Arial,Helvetica,sans-serif}
                            """
    conversion_options = {
                          'comment'  : description
                        , 'tags'     : 'blog, technology, microsoft, programming, C#, Java'
                        , 'publisher': 'Ted Neward'
                        , 'language' : language
                        }
    feeds = [(u'Posts', u'http://blogs.tedneward.com/SyndicationService.asmx/GetRss')]
--- a/resources/recipes/weblogs_sl.recipe
+++ b/resources/recipes/weblogs_sl.recipe
@ -0,0 +1,104 @@
 #!/usr/bin/env  python
 __license__     = 'GPL v3'
 __copyright__   = '4 February 2011, desUBIKado'
 __author__      = 'desUBIKado'
 __version__     = 'v0.05'
 __date__        = '9, February 2011'
 '''
 http://www.weblogssl.com/
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class weblogssl(BasicNewsRecipe):
    __author__     = 'desUBIKado'
    description    = u'Weblogs colectivos dedicados a seguir la actualidad sobre tecnologia, entretenimiento, estilos de vida, motor, deportes y economia.'
    title          = u'Weblogs SL (Xataka, Genbeta, VidaExtra, Blog de Cine y otros)'
    publisher      = 'Weblogs SL'
    category       = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1.5
    max_articles_per_feed = 100
    encoding       = 'utf-8'
    use_embedded_content  = False
    remove_empty_feeds    = True
    remove_javascript = True
    no_stylesheets = True
    # Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
    # un caracter # por delante, es decir,  # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
    # haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
    feeds              = [
                          (u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
                          (u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
                          (u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
                          (u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
                          (u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
                          (u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
                          (u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
                          (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
                          (u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
                          (u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
                          (u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
                          (u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
                          (u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
                          (u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
                          (u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
                          (u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
                          (u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
                          (u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
                          (u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
                          (u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
                          (u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
                          (u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
                          (u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
                          (u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
                          (u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
                          (u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
                          (u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
                          (u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
                          (u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
                          (u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
                          (u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
                          (u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
                          (u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
                          (u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
                          (u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
                          (u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
                          (u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
                          (u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
                          (u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
                          (u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
                         ]
    keep_only_tags     = [dict(name='div', attrs={'id':'infoblock'}),
                          dict(name='div', attrs={'class':'post'}),
                          dict(name='div', attrs={'id':'blog-comments'})
                         ]
    remove_tags        = [dict(name='div', attrs={'id':'comment-nav'})]
    def print_version(self, url):
          return url.replace('http://www.', 'http://m.')
    preprocess_regexps = [
                            # Para poner una linea en blanco entre un comentario y el siguiente
                           (re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
                         ]
    # Para sustituir el video incrustado de YouTube por una imagen
    def preprocess_html(self, soup):
        for video_yt in soup.findAll('iframe',{'title':'YouTube video player'}):
            if video_yt:
               video_yt.name = 'img'
               fuente = video_yt['src']
               fuente2 = fuente.replace('http://www.youtube.com/embed/','http://img.youtube.com/vi/')
               fuente3 = fuente2.replace('?rel=0','')
               video_yt['src'] = fuente3 + '/0.jpg'
        return soup
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.44'
+__version__   = '0.7.45'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -507,7 +507,7 @@ from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-    LibraryThing
+    KentDistrictLibrary
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
@ -517,7 +517,7 @@ from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        LibraryThing, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
        Epubcheck, OpenLibraryCovers, LibraryThingCovers, DoubanCovers,
        NiceBooksCovers]
 plugins += [
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -83,7 +83,7 @@ class ANDROID(USBMS):
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
-            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE']
+            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT']
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -0,0 +1,68 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2011, John Schember <john at nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Generates and writes an APNX page mapping file.
 '''
 import struct
 import uuid
 from calibre.ebooks.pdb.header import PdbHeaderReader
 class APNXBuilder(object):
    '''
    Currently uses the Adobe 1024 byte count equal one page formula.
    '''
    def write_apnx(self, mobi_file_path, apnx_path):
        with open(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]
        pages = self.get_pages(text_length)
        apnx = self.generate_apnx(pages)
        with open(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)
    def generate_apnx(self, pages):
        apnx = ''
        content_vals = {
            'guid': str(uuid.uuid4()).replace('-', '')[:8],
            'isbn': '',
        }
        content_header = '{"contentGuid":"%(guid)s","asin":"%(isbn)s","cdeType":"EBOK","fileRevisionId":"1"}' % content_vals
        page_header = '{"asin":"%(isbn)s","pageMap":"(1,a,1)"}' % content_vals
        apnx += struct.pack('>I', 65537)
        apnx += struct.pack('>I', 12 + len(content_header))
        apnx += struct.pack('>I', len(content_header))
        apnx += content_header
        apnx += struct.pack('>H', 1)
        apnx += struct.pack('>H', len(page_header))
        apnx += struct.pack('>H', len(pages))
        apnx += struct.pack('>H', 32)
        apnx += page_header
        # write page values to apnx
        for page in pages:
            apnx += struct.pack('>L', page)
        return apnx
    def get_pages(self, text_length):
        pages = []
        count = 0
        while count < text_length:
            pages.append(count)
            count += 1024
        return pages
--- a/src/calibre/devices/kindle/bookmark.py
+++ b/src/calibre/devices/kindle/bookmark.py
@ -0,0 +1,315 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __docformat__ = 'restructuredtext en'
 import os
 from cStringIO import StringIO
 from struct import unpack
 class Bookmark(): # {{{
    '''
    A simple class fetching bookmark data
    Kindle-specific
    '''
    def __init__(self, path, id, book_format, bookmark_extension):
        self.book_format = book_format
        self.bookmark_extension = bookmark_extension
        self.book_length = 0
        self.id = id
        self.last_read = 0
        self.last_read_location = 0
        self.path = path
        self.timestamp = 0
        self.user_notes = None
        self.get_bookmark_data()
        self.get_book_length()
        try:
            self.percent_read = min(float(100*self.last_read / self.book_length),100)
        except:
            self.percent_read = 0
    def record(self, n):
        from calibre.ebooks.metadata.mobi import StreamSlicer
        if n >= self.nrecs:
            raise ValueError('non-existent record %r' % n)
        offoff = 78 + (8 * n)
        start, = unpack('>I', self.data[offoff + 0:offoff + 4])
        stop = None
        if n < (self.nrecs - 1):
            stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
        return StreamSlicer(self.stream, start, stop)
    def get_bookmark_data(self):
        ''' Return the timestamp and last_read_location '''
        from calibre.ebooks.metadata.mobi import StreamSlicer
        user_notes = {}
        if self.bookmark_extension == 'mbp':
            MAGIC_MOBI_CONSTANT = 150
            with open(self.path,'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.timestamp, = unpack('>I', data[0x24:0x28])
                bpar_offset, = unpack('>I', data[0x4e:0x52])
                lrlo = bpar_offset + 0x0c
                self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
                self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
                entries, = unpack('>I', data[0x4a:0x4e])
                # Store the annotations/locations
                bpl = bpar_offset + 4
                bpar_len, = unpack('>I', data[bpl:bpl+4])
                bpar_len += 8
                #print "bpar_len: 0x%x" % bpar_len
                eo = bpar_offset + bpar_len
                # Walk bookmark entries
                #print " --- %s --- " % self.path
                current_entry = 1
                sig = data[eo:eo+4]
                previous_block = None
                while sig == 'DATA':
                    text = None
                    entry_type = None
                    rec_len, = unpack('>I', data[eo+4:eo+8])
                    if rec_len == 0:
                        current_block = "empty_data"
                    elif  data[eo+8:eo+12] == "EBAR":
                        current_block = "data_header"
                        #entry_type = "data_header"
                        location, = unpack('>I', data[eo+0x34:eo+0x38])
                        #print "data_header location: %d" % location
                    else:
                        current_block = "text_block"
                        if previous_block == 'empty_data':
                            entry_type = 'Note'
                        elif previous_block == 'data_header':
                            entry_type = 'Highlight'
                        text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
                    if entry_type:
                        displayed_location = location/MAGIC_MOBI_CONSTANT + 1
                        user_notes[location] = dict(id=self.id,
                                                    displayed_location=displayed_location,
                                                    type=entry_type,
                                                    text=text)
                    eo += rec_len + 8
                    current_entry += 1
                    previous_block = current_block
                    sig = data[eo:eo+4]
                while sig == 'BKMK':
                    # Fix start location for Highlights using BKMK data
                    end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
                    if end_loc in user_notes and \
                       (user_notes[end_loc]['type'] == 'Highlight' or \
                        user_notes[end_loc]['type'] == 'Note'):
                        # Switch location to start (0x08:0x0c)
                        start, = unpack('>I', data[eo+8:eo+12])
                        user_notes[start] = user_notes[end_loc]
                        '''
                        print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
                                                                    end_loc,
                                                                    end_loc/MAGIC_MOBI_CONSTANT + 1,
                                                                    start,
                                                                    start//MAGIC_MOBI_CONSTANT + 1)
                        '''
                        user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1
                        user_notes.pop(end_loc)
                    else:
                        # If a bookmark coincides with a user annotation, the locs could
                        # be the same - cheat by nudging -1
                        # Skip bookmark for last_read_location
                        if end_loc != self.last_read:
                            # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
                            displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
                            user_notes[end_loc - 1] = dict(id=self.id,
                                                           displayed_location=displayed_location,
                                                           type='Bookmark',
                                                           text=None)
                    rec_len, = unpack('>I', data[eo+4:eo+8])
                    eo += rec_len + 8
                    sig = data[eo:eo+4]
        elif self.bookmark_extension == 'tan':
            from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata
            def get_topaz_highlight(displayed_location):
                # Parse My Clippings.txt for a matching highlight
                # Search looks for book title match, highlight match, and location match
                # Author is not matched
                # This will find the first instance of a clipping only
                book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
                with open(book_fs,'rb') as f2:
                    stream = StringIO(f2.read())
                    mi = get_topaz_metadata(stream)
                my_clippings = self.path
                split = my_clippings.find('documents') + len('documents/')
                my_clippings = my_clippings[:split] + "My Clippings.txt"
                try:
                    with open(my_clippings, 'r') as f2:
                        marker_found = 0
                        text = ''
                        search_str1 = '%s' % (mi.title)
                        search_str2 = '- Highlight Loc. %d' % (displayed_location)
                        for line in f2:
                            if marker_found == 0:
                                if line.startswith(search_str1):
                                    marker_found = 1
                            elif marker_found == 1:
                                if line.startswith(search_str2):
                                    marker_found = 2
                            elif marker_found == 2:
                                if line.startswith('=========='):
                                    break
                                text += line.strip()
                        else:
                            raise Exception('error')
                except:
                    text = '(Unable to extract highlight text from My Clippings.txt)'
                return text
            MAGIC_TOPAZ_CONSTANT = 33.33
            self.timestamp = os.path.getmtime(self.path)
            with open(self.path,'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                while current_entry < entries:
                    location, = unpack('>I', data[e_base+2:e_base+6])
                    text = None
                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
                    e_type, = unpack('>B', data[e_base+1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base+0x10:e_base+0x10+text_len]
                    else:
                        e_type = 'Unknown annotation type'
                    displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                for location in user_notes:
                    if location == self.last_read:
                        user_notes.pop(location)
                        break
        elif self.bookmark_extension == 'pdr':
            self.timestamp = os.path.getmtime(self.path)
            with open(self.path,'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                self.pdf_page_offset = 0
                while current_entry < entries:
                    '''
                    location, = unpack('>I', data[e_base+2:e_base+6])
                    text = None
                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
                    e_type, = unpack('>B', data[e_base+1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base+0x10:e_base+0x10+text_len]
                    else:
                        e_type = 'Unknown annotation type'
                    if self.book_format in ['tpz','azw1']:
                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
                    elif self.book_format == 'pdf':
                        # *** This needs implementation
                        displayed_location = location
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                    '''
                    # Use label as page number
                    pdf_location, = unpack('>I', data[e_base+1:e_base+5])
                    label_len, = unpack('>H', data[e_base+5:e_base+7])
                    location = int(data[e_base+7:e_base+7+label_len])
                    displayed_location = location
                    e_type = 'Bookmark'
                    text = None
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    self.pdf_page_offset = pdf_location - location
                    e_base += (7 + label_len)
                    current_entry += 1
                self.last_read_location = self.last_read - self.pdf_page_offset
        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
        self.user_notes = user_notes
    def get_book_length(self):
        from calibre.ebooks.metadata.mobi import StreamSlicer
        book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
        self.book_length = 0
        if self.bookmark_extension == 'mbp':
            # Read the book len from the header
            try:
                with open(book_fs,'rb') as f:
                    self.stream = StringIO(f.read())
                    self.data = StreamSlicer(self.stream)
                    self.nrecs, = unpack('>H', self.data[76:78])
                    record0 = self.record(0)
                    self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
            except:
                pass
        elif self.bookmark_extension == 'tan':
            # Read bookLength from metadata
            from calibre.ebooks.metadata.topaz import MetadataUpdater
            try:
                with open(book_fs,'rb') as f:
                    mu = MetadataUpdater(f)
                    self.book_length = mu.book_length
            except:
                pass
        elif self.bookmark_extension == 'pdr':
            from calibre import plugins
            try:
                self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read())
            except:
                pass
        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
 # }}}
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -7,10 +7,11 @@ __docformat__ = 'restructuredtext en'
 '''
 Device driver for Amazon's Kindle
 '''
 import datetime, os, re, sys, json, hashlib
 from cStringIO import StringIO
 from struct import unpack
 import datetime, os, re, sys, json, hashlib
 from calibre.devices.kindle.apnx import APNXBuilder
 from calibre.devices.kindle.bookmark import Bookmark
 from calibre.devices.usbms.driver import USBMS
 '''
@ -170,6 +171,8 @@ class KINDLE2(KINDLE):
    description    = _('Communicate with the Kindle 2/3 eBook reader.')
    FORMATS        = KINDLE.FORMATS + ['pdf']
    DELETE_EXTS    = KINDLE.DELETE_EXTS + ['.apnx']
    PRODUCT_ID = [0x0002, 0x0004]
    BCD        = [0x0100]
@ -205,6 +208,23 @@ class KINDLE2(KINDLE):
                if h in path_map:
                    book.device_collections = list(sorted(path_map[h]))
    def upload_cover(self, path, filename, metadata, filepath):
        '''
        Hijacking this function to write the apnx file.
        '''
        if not filepath.lower().endswith('.mobi'):
            return
        apnx_path = '%s.apnx' % os.path.join(path, filename)
        apnx_builder = APNXBuilder()
        try:
            apnx_builder.write_apnx(filepath, apnx_path)
        except:
            print 'Failed to generate APNX'
            import traceback
            traceback.print_exc()
 class KINDLE_DX(KINDLE2):
    name           = 'Kindle DX Device Interface'
@ -214,310 +234,3 @@ class KINDLE_DX(KINDLE2):
    PRODUCT_ID = [0x0003]
    BCD        = [0x0100]
 class Bookmark(): # {{{
    '''
    A simple class fetching bookmark data
    Kindle-specific
    '''
    def __init__(self, path, id, book_format, bookmark_extension):
        self.book_format = book_format
        self.bookmark_extension = bookmark_extension
        self.book_length = 0
        self.id = id
        self.last_read = 0
        self.last_read_location = 0
        self.path = path
        self.timestamp = 0
        self.user_notes = None
        self.get_bookmark_data()
        self.get_book_length()
        try:
            self.percent_read = min(float(100*self.last_read / self.book_length),100)
        except:
            self.percent_read = 0
    def record(self, n):
        from calibre.ebooks.metadata.mobi import StreamSlicer
        if n >= self.nrecs:
            raise ValueError('non-existent record %r' % n)
        offoff = 78 + (8 * n)
        start, = unpack('>I', self.data[offoff + 0:offoff + 4])
        stop = None
        if n < (self.nrecs - 1):
            stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
        return StreamSlicer(self.stream, start, stop)
    def get_bookmark_data(self):
        ''' Return the timestamp and last_read_location '''
        from calibre.ebooks.metadata.mobi import StreamSlicer
        user_notes = {}
        if self.bookmark_extension == 'mbp':
            MAGIC_MOBI_CONSTANT = 150
            with open(self.path,'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.timestamp, = unpack('>I', data[0x24:0x28])
                bpar_offset, = unpack('>I', data[0x4e:0x52])
                lrlo = bpar_offset + 0x0c
                self.last_read = int(unpack('>I', data[lrlo:lrlo+4])[0])
                self.last_read_location = self.last_read/MAGIC_MOBI_CONSTANT + 1
                entries, = unpack('>I', data[0x4a:0x4e])
                # Store the annotations/locations
                bpl = bpar_offset + 4
                bpar_len, = unpack('>I', data[bpl:bpl+4])
                bpar_len += 8
                #print "bpar_len: 0x%x" % bpar_len
                eo = bpar_offset + bpar_len
                # Walk bookmark entries
                #print " --- %s --- " % self.path
                current_entry = 1
                sig = data[eo:eo+4]
                previous_block = None
                while sig == 'DATA':
                    text = None
                    entry_type = None
                    rec_len, = unpack('>I', data[eo+4:eo+8])
                    if rec_len == 0:
                        current_block = "empty_data"
                    elif  data[eo+8:eo+12] == "EBAR":
                        current_block = "data_header"
                        #entry_type = "data_header"
                        location, = unpack('>I', data[eo+0x34:eo+0x38])
                        #print "data_header location: %d" % location
                    else:
                        current_block = "text_block"
                        if previous_block == 'empty_data':
                            entry_type = 'Note'
                        elif previous_block == 'data_header':
                            entry_type = 'Highlight'
                        text = data[eo+8:eo+8+rec_len].decode('utf-16-be')
                    if entry_type:
                        displayed_location = location/MAGIC_MOBI_CONSTANT + 1
                        user_notes[location] = dict(id=self.id,
                                                    displayed_location=displayed_location,
                                                    type=entry_type,
                                                    text=text)
                    eo += rec_len + 8
                    current_entry += 1
                    previous_block = current_block
                    sig = data[eo:eo+4]
                while sig == 'BKMK':
                    # Fix start location for Highlights using BKMK data
                    end_loc, = unpack('>I', data[eo+0x10:eo+0x14])
                    if end_loc in user_notes and \
                       (user_notes[end_loc]['type'] == 'Highlight' or \
                        user_notes[end_loc]['type'] == 'Note'):
                        # Switch location to start (0x08:0x0c)
                        start, = unpack('>I', data[eo+8:eo+12])
                        user_notes[start] = user_notes[end_loc]
                        '''
                        print " %s: swapping 0x%x (%d) to 0x%x (%d)" % (user_notes[end_loc]['type'],
                                                                    end_loc,
                                                                    end_loc/MAGIC_MOBI_CONSTANT + 1,
                                                                    start,
                                                                    start//MAGIC_MOBI_CONSTANT + 1)
                        '''
                        user_notes[start]['displayed_location'] = start/MAGIC_MOBI_CONSTANT + 1
                        user_notes.pop(end_loc)
                    else:
                        # If a bookmark coincides with a user annotation, the locs could
                        # be the same - cheat by nudging -1
                        # Skip bookmark for last_read_location
                        if end_loc != self.last_read:
                            # print " adding Bookmark at 0x%x (%d)" % (end_loc, end_loc/MAGIC_MOBI_CONSTANT + 1)
                            displayed_location = end_loc/MAGIC_MOBI_CONSTANT + 1
                            user_notes[end_loc - 1] = dict(id=self.id,
                                                           displayed_location=displayed_location,
                                                           type='Bookmark',
                                                           text=None)
                    rec_len, = unpack('>I', data[eo+4:eo+8])
                    eo += rec_len + 8
                    sig = data[eo:eo+4]
        elif self.bookmark_extension == 'tan':
            from calibre.ebooks.metadata.topaz import get_metadata as get_topaz_metadata
            def get_topaz_highlight(displayed_location):
                # Parse My Clippings.txt for a matching highlight
                # Search looks for book title match, highlight match, and location match
                # Author is not matched
                # This will find the first instance of a clipping only
                book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
                with open(book_fs,'rb') as f2:
                    stream = StringIO(f2.read())
                    mi = get_topaz_metadata(stream)
                my_clippings = self.path
                split = my_clippings.find('documents') + len('documents/')
                my_clippings = my_clippings[:split] + "My Clippings.txt"
                try:
                    with open(my_clippings, 'r') as f2:
                        marker_found = 0
                        text = ''
                        search_str1 = '%s' % (mi.title)
                        search_str2 = '- Highlight Loc. %d' % (displayed_location)
                        for line in f2:
                            if marker_found == 0:
                                if line.startswith(search_str1):
                                    marker_found = 1
                            elif marker_found == 1:
                                if line.startswith(search_str2):
                                    marker_found = 2
                            elif marker_found == 2:
                                if line.startswith('=========='):
                                    break
                                text += line.strip()
                        else:
                            raise Exception('error')
                except:
                    text = '(Unable to extract highlight text from My Clippings.txt)'
                return text
            MAGIC_TOPAZ_CONSTANT = 33.33
            self.timestamp = os.path.getmtime(self.path)
            with open(self.path,'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                self.last_read_location = self.last_read/MAGIC_TOPAZ_CONSTANT + 1
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                while current_entry < entries:
                    location, = unpack('>I', data[e_base+2:e_base+6])
                    text = None
                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
                    e_type, = unpack('>B', data[e_base+1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base+0x10:e_base+0x10+text_len]
                    else:
                        e_type = 'Unknown annotation type'
                    displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                for location in user_notes:
                    if location == self.last_read:
                        user_notes.pop(location)
                        break
        elif self.bookmark_extension == 'pdr':
            self.timestamp = os.path.getmtime(self.path)
            with open(self.path,'rb') as f:
                stream = StringIO(f.read())
                data = StreamSlicer(stream)
                self.last_read = int(unpack('>I', data[5:9])[0])
                entries, = unpack('>I', data[9:13])
                current_entry = 0
                e_base = 0x0d
                self.pdf_page_offset = 0
                while current_entry < entries:
                    '''
                    location, = unpack('>I', data[e_base+2:e_base+6])
                    text = None
                    text_len, = unpack('>I', data[e_base+0xA:e_base+0xE])
                    e_type, = unpack('>B', data[e_base+1])
                    if e_type == 0:
                        e_type = 'Bookmark'
                    elif e_type == 1:
                        e_type = 'Highlight'
                        text = get_topaz_highlight(location/MAGIC_TOPAZ_CONSTANT + 1)
                    elif e_type == 2:
                        e_type = 'Note'
                        text = data[e_base+0x10:e_base+0x10+text_len]
                    else:
                        e_type = 'Unknown annotation type'
                    if self.book_format in ['tpz','azw1']:
                        displayed_location = location/MAGIC_TOPAZ_CONSTANT + 1
                    elif self.book_format == 'pdf':
                        # *** This needs implementation
                        displayed_location = location
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    if text_len == 0xFFFFFFFF:
                        e_base = e_base + 14
                    else:
                        e_base = e_base + 14 + 2 + text_len
                    current_entry += 1
                    '''
                    # Use label as page number
                    pdf_location, = unpack('>I', data[e_base+1:e_base+5])
                    label_len, = unpack('>H', data[e_base+5:e_base+7])
                    location = int(data[e_base+7:e_base+7+label_len])
                    displayed_location = location
                    e_type = 'Bookmark'
                    text = None
                    user_notes[location] = dict(id=self.id,
                                                displayed_location=displayed_location,
                                                type=e_type,
                                                text=text)
                    self.pdf_page_offset = pdf_location - location
                    e_base += (7 + label_len)
                    current_entry += 1
                self.last_read_location = self.last_read - self.pdf_page_offset
        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
        self.user_notes = user_notes
    def get_book_length(self):
        from calibre.ebooks.metadata.mobi import StreamSlicer
        book_fs = self.path.replace('.%s' % self.bookmark_extension,'.%s' % self.book_format)
        self.book_length = 0
        if self.bookmark_extension == 'mbp':
            # Read the book len from the header
            try:
                with open(book_fs,'rb') as f:
                    self.stream = StringIO(f.read())
                    self.data = StreamSlicer(self.stream)
                    self.nrecs, = unpack('>H', self.data[76:78])
                    record0 = self.record(0)
                    self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
            except:
                pass
        elif self.bookmark_extension == 'tan':
            # Read bookLength from metadata
            from calibre.ebooks.metadata.topaz import MetadataUpdater
            try:
                with open(book_fs,'rb') as f:
                    mu = MetadataUpdater(f)
                    self.book_length = mu.book_length
            except:
                pass
        elif self.bookmark_extension == 'pdr':
            from calibre import plugins
            try:
                self.book_length = plugins['pdfreflow'][0].get_numpages(open(book_fs).read())
            except:
                pass
        else:
            print "unsupported bookmark_extension: %s" % self.bookmark_extension
 # }}}
--- a/src/calibre/devices/nokia/driver.py
+++ b/src/calibre/devices/nokia/driver.py
@ -76,11 +76,11 @@ class E52(USBMS):
    supported_platforms = ['windows', 'linux', 'osx']
    VENDOR_ID = [0x421]
-    PRODUCT_ID = [0x1CD]
+    PRODUCT_ID = [0x1CD, 0x273]
    BCD = [0x100]
-    FORMATS = ['mobi', 'prc']
+    FORMATS = ['epub', 'fb2', 'mobi', 'prc', 'txt']
    EBOOK_DIR_MAIN = 'eBooks'
    SUPPORTS_SUB_DIRS = True
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -216,8 +216,8 @@ class EPUBOutput(OutputFormatPlugin):
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
            from calibre.ebooks.epub import initialize_container
-            epub = initialize_container(output_path, os.path.basename(opf),
+            with initialize_container(output_path, os.path.basename(opf),
-                    extra_entries=extra_entries)
+                    extra_entries=extra_entries) as epub:
                epub.add_dir(tdir)
                if encryption is not None:
                    epub.writestr('META-INF/encryption.xml', encryption)
@ -225,12 +225,13 @@ class EPUBOutput(OutputFormatPlugin):
                    epub.writestr('META-INF/metadata.xml',
                            metadata_xml.encode('utf-8'))
            if opts.extract_to is not None:
                from calibre.utils.zipfile import ZipFile
                if os.path.exists(opts.extract_to):
                    shutil.rmtree(opts.extract_to)
                os.mkdir(opts.extract_to)
-                epub.extractall(path=opts.extract_to)
+                with ZipFile(output_path) as zf:
                    zf.extractall(path=opts.extract_to)
                self.log.info('EPUB extracted to', opts.extract_to)
            epub.close()
    def encrypt_fonts(self, uris, tdir, uuid): # {{{
        from binascii import unhexlify
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -247,30 +247,24 @@ class Amazon(MetadataSource): # {{{
    # }}}
-class LibraryThing(MetadataSource): # {{{
+class KentDistrictLibrary(MetadataSource): # {{{
-    name = 'LibraryThing'
+    name = 'Kent District Library'
    metadata_type = 'social'
-    description = _('Downloads series/covers/rating information from librarything.com')
+    description = _('Downloads series information from ww2.kdl.org')
    def fetch(self):
-        if not self.isbn or not self.site_customization:
+        if not self.title or not self.book_author:
            return
-        from calibre.ebooks.metadata.library_thing import get_social_metadata
+        from calibre.ebooks.metadata.kdl import get_series
        un, _, pw = self.site_customization.partition(':')
        try:
-            self.results = get_social_metadata(self.title, self.book_author,
+            self.results = get_series(self.title, self.book_author)
                    self.publisher, self.isbn, username=un, password=pw)
        except Exception, e:
            import traceback
            traceback.print_exc()
            self.exception = e
            self.tb = traceback.format_exc()
    @property
    def string_customization_help(self):
        ans = _('To use librarything.com you must sign up for a %sfree account%s '
                'and enter your username and password separated by a : below.')
        return '<p>'+ans%('<a href="http://www.librarything.com">', '</a>')
    # }}}
--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -0,0 +1,79 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re, urllib, urlparse
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre import browser
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.chardet import xml_to_unicode
 URL = \
 "http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="
 _ignore_starts = u'\'"'+u''.join(unichr(x) for x in range(0x2018, 0x201e)+[0x2032, 0x2033])
 def get_series(title, authors):
    mi = Metadata(title, authors)
    if title and title[0] in _ignore_starts:
        title = title[1:]
    title = re.sub(r'^(A|The|An)\s+', '', title).strip()
    if not title:
        return mi
    if isinstance(title, unicode):
        title = title.encode('utf-8')
    title = urllib.quote_plus(title)
    author = authors[0].strip()
    if not author:
        return mi
    if ',' in author:
        author = author.split(',')[0]
    else:
        author = author.split()[-1]
    url = URL.format(author, title)
    br = browser()
    raw = br.open(url).read()
    if 'see the full results' not in raw:
        return mi
    raw = xml_to_unicode(raw)[0]
    soup = BeautifulSoup(raw)
    searcharea = soup.find('div', attrs={'class':'searcharea'})
    if searcharea is None:
        return mi
    ss = searcharea.find('div', attrs={'class':'seriessearch'})
    if ss is None:
        return mi
    a = ss.find('a', href=True)
    if a is None:
        return mi
    href = a['href'].partition('?')[-1]
    data = urlparse.parse_qs(href)
    series = data.get('SeriesName', [])
    if not series:
        return mi
    series = series[0]
    series = re.sub(r' series$', '', series).strip()
    if series:
        mi.series = series
    ns = ss.nextSibling
    if ns.contents:
        raw = unicode(ns.contents[0])
        raw = raw.partition('.')[0].strip()
        try:
            mi.series_index = int(raw)
        except:
            pass
    return mi
 if __name__ == '__main__':
    import sys
    print get_series(sys.argv[-2], [sys.argv[-1]])
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -39,6 +39,13 @@ def asfloat(value):
        return 0.0
    return float(value)
 def isspace(text):
    if not text:
        return True
    if u'\xa0' in text:
        return False
    return text.isspace()
 class BlockState(object):
    def __init__(self, body):
        self.body = body
@ -438,7 +445,7 @@ class MobiMLizer(object):
        if elem.text:
            if istate.preserve:
                text = elem.text
-            elif len(elem) > 0 and elem.text.isspace():
+            elif len(elem) > 0 and isspace(elem.text):
                text = None
            else:
                text = COLLAPSE.sub(' ', elem.text)
@ -481,7 +488,7 @@ class MobiMLizer(object):
            if child.tail:
                if istate.preserve:
                    tail = child.tail
-                elif bstate.para is None and child.tail.isspace():
+                elif bstate.para is None and isspace(child.tail):
                    tail = None
                else:
                    tail = COLLAPSE.sub(' ', child.tail)
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -70,7 +70,7 @@ class PML_HTMLizer(object):
        'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
        'r': ('<div style="text-align: right;">', '</div>'),
        't': ('<div style="margin-left: 5%;">', '</div>'),
-        'T': ('<div style="margin-left: %s;">', '</div>'),
+        'T': ('<div style="text-indent: %s;">', '</div>'),
        'i': ('<span style="font-style: italic;">', '</span>'),
        'u': ('<span style="text-decoration: underline;">', '</span>'),
        'd': ('<span style="text-decoration: line-through;">', '</span>'),
@ -499,7 +499,13 @@ class PML_HTMLizer(object):
        self.toc = []
        self.file_name = file_name
-        indent_state = {'t': False, 'T': False}
+        # t: Are we in an open \t tag set?
        # T: Are we in an open \T?
        # st: Did the \t start the line?
        # sT: Did the \T start the line?
        # et: Did the \t end the line?
        indent_state = {'t': False, 'T': False, 'st': False, 'sT': False, 'et': False}
        basic_indent = False
        adv_indent_val = ''
        # Keep track of the number of empty lines
        # between paragraphs. When we reach a set number
@ -512,8 +518,26 @@ class PML_HTMLizer(object):
        for line in pml.splitlines():
            parsed = []
            empty = True
            basic_indent = indent_state['t']
-            adv_indent = indent_state['T']
+            indent_state['T'] = False
            # Determine if the \t starts the line or if we are
            # in an open \t block.
            if line.lstrip().startswith('\\t') or basic_indent:
                basic_indent = True
                indent_state['st'] = True
            else:
                indent_state['st'] = False
            # Determine if the \T starts the line.
            if line.lstrip().startswith('\\T'):
                indent_state['sT'] = True
            else:
                indent_state['sT'] = False
            # Determine if the \t ends the line.
            if line.rstrip().endswith('\\t'):
                indent_state['et'] = True
            else:
                indent_state['et'] = False
            # Must use StringIO, cStringIO does not support unicode
            line = StringIO.StringIO(line)
@ -575,13 +599,10 @@ class PML_HTMLizer(object):
                        empty = False
                        text = '<hr width="%s" />' % self.code_value(line)
                    elif c == 't':
-                        indent_state[c] = not indent_state[c]
+                        indent_state['t'] = not indent_state['t']
                        if indent_state[c]:
                            basic_indent = True
                    elif c == 'T':
                        # Ensure we only store the value on the first T set for the line.
                        if not indent_state['T']:
                            adv_indent = True
                            adv_indent_val = self.code_value(line)
                        else:
                            # We detected a T previously on this line.
@ -610,10 +631,23 @@ class PML_HTMLizer(object):
                text = self.end_line()
                parsed.append(text)
                # Basic indent will be set if the \t starts the line or
                # if we are in a continuing \t block.
                if basic_indent:
                    # if the \t started the line and either it ended the line or the \t
                    # block is still open use a left margin.
                    if indent_state['st'] and (indent_state['et'] or indent_state['t']):
                        parsed.insert(0, self.STATES_TAGS['t'][0])
                        parsed.append(self.STATES_TAGS['t'][1])
-                elif adv_indent:
+                    # Use a text indent instead of a margin.
                    # This handles cases such as:
                    # \tO\tne upon a time...
                    else:
                        parsed.insert(0, self.STATES_TAGS['T'][0] % '5%')
                        parsed.append(self.STATES_TAGS['T'][1])
                # \t will override \T's on the line.
                # We only handle \T's that started the line.
                elif indent_state['T'] and indent_state['sT']:
                    parsed.insert(0, self.STATES_TAGS['T'][0] % adv_indent_val)
                    parsed.append(self.STATES_TAGS['T'][1])
                    indent_state['T'] = False
--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -237,6 +237,7 @@ class ChooseLibraryAction(InterfaceAction):
            return
        self.stats.rename(location, newloc)
        self.build_menus()
        self.gui.iactions['Copy To Library'].build_menus()
    def delete_requested(self, name, location):
        loc = location.replace('/', os.sep)
@ -253,6 +254,7 @@ class ChooseLibraryAction(InterfaceAction):
                pass
        self.stats.remove(location)
        self.build_menus()
        self.gui.iactions['Copy To Library'].build_menus()
    def backup_status(self, location):
        dirty_text = 'no'
@ -329,6 +331,7 @@ class ChooseLibraryAction(InterfaceAction):
                    ' libraries.')%loc, show=True)
            self.stats.remove(location)
            self.build_menus()
            self.gui.iactions['Copy To Library'].build_menus()
            return
        prefs['library_path'] = loc
@ -371,9 +374,20 @@ class ChooseLibraryAction(InterfaceAction):
        if not self.change_library_allowed():
            return
        from calibre.gui2.dialogs.choose_library import ChooseLibrary
        self.gui.library_view.save_state()
        db = self.gui.library_view.model().db
-        c = ChooseLibrary(db, self.gui.library_moved, self.gui)
+        location = self.stats.canonicalize_path(db.library_path)
        self.pre_choose_dialog_location = location
        c = ChooseLibrary(db, self.choose_library_callback, self.gui)
        c.exec_()
        self.choose_dialog_library_renamed = getattr(c, 'library_renamed', False)
    def choose_library_callback(self, newloc, copy_structure=False):
        self.gui.library_moved(newloc, copy_structure=copy_structure)
        if getattr(self, 'choose_dialog_library_renamed', False):
            self.stats.rename(self.pre_choose_dialog_location, prefs['library_path'])
        self.build_menus()
        self.gui.iactions['Copy To Library'].build_menus()
    def change_library_allowed(self):
        if os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH', None):
--- a/src/calibre/gui2/dialogs/choose_library.py
+++ b/src/calibre/gui2/dialogs/choose_library.py
@ -71,6 +71,8 @@ class ChooseLibrary(QDialog, Ui_Dialog):
            prefs['library_path'] = loc
            self.callback(loc, copy_structure=self.copy_structure.isChecked())
        else:
            self.db.prefs.disable_setting = True
            self.library_renamed = True
            move_library(self.db.library_path, loc, self.parent(),
                    self.callback)
--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@ -60,7 +60,8 @@ class Tweak(object): # {{{
        return ans
    def __cmp__(self, other):
-        return cmp(self.is_customized, getattr(other, 'is_customized', False))
+        return -1 * cmp(self.is_customized,
                            getattr(other, 'is_customized', False))
    @property
    def is_customized(self):
@ -111,7 +112,10 @@ class Tweaks(QAbstractListModel): # {{{
        if role == Qt.ToolTipRole:
            tt = _('This tweak has it default value')
            if tweak.is_customized:
-                tt = _('This tweak has been customized')
+                tt = '<p>'+_('This tweak has been customized')
                tt += '<pre>'
                for varn, val in tweak.custom_values.iteritems():
                    tt += '%s = %r\n\n'%(varn, val)
            return tt
        if role == Qt.UserRole:
            return tweak
@ -136,6 +140,7 @@ class Tweaks(QAbstractListModel): # {{{
                pos = self.read_tweak(lines, pos, dl, l)
            pos += 1
        self.tweaks.sort()
        default_keys = set(dl.iterkeys())
        custom_keys = set(l.iterkeys())
@ -227,8 +232,12 @@ class PluginTweaks(QDialog): # {{{
        self.highlighter = PythonHighlighter(self.edit.document())
        self.l = QVBoxLayout()
        self.setLayout(self.l)
-        self.l.addWidget(QLabel(
+        self.msg = QLabel(
-            _('Add/edit tweaks for any custom plugins you have installed.')))
+            _('Add/edit tweaks for any custom plugins you have installed. '
                'Documentation for these tweaks should be available '
                'on the website from where you downloaded the plugins.'))
        self.msg.setWordWrap(True)
        self.l.addWidget(self.msg)
        self.l.addWidget(self.edit)
        self.edit.setPlainText(raw)
        self.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -440,16 +440,17 @@ class Document(QWebPage): # {{{
    @property
    def height(self):
-        j = self.javascript('document.body.offsetHeight', 'int')
+        # Note that document.body.offsetHeight does not include top and bottom
        # margins on body and in some cases does not include the top margin on
        # the first element inside body either. See ticket #8791 for an example
        # of the latter.
        q = self.mainFrame().contentsSize().height()
-        if q == j:
+        if q < 0:
-            return j
+            # Don't know if this is still needed, but it can't hurt
-        if min(j, q) <= 0:
+            j = self.javascript('document.body.offsetHeight', 'int')
-            return max(j, q)
+            if j >= 0:
-        window_height = self.window_height
+                q = j
-        if j == window_height:
+        return q
            return j if q < 1.2*j else q
        return j
    @property
    def width(self):
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@ -7,7 +7,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, traceback, cStringIO, re, shutil
 from functools import partial
 from calibre.constants import DEBUG
 from calibre.utils.config import Config, StringConfig, tweaks
@ -142,11 +141,19 @@ class SafeFormat(TemplateFormatter):
 def get_components(template, mi, id, timefmt='%b %Y', length=250,
        sanitize_func=ascii_filename, replace_whitespace=False,
        to_lowercase=False):
-    tsfmt = partial(title_sort, order=tweaks['save_template_title_series_sorting'])
+
    tsorder = tweaks['save_template_title_series_sorting']
    format_args = FORMAT_ARGS.copy()
    format_args.update(mi.all_non_none_fields())
    if mi.title:
-        format_args['title'] = tsfmt(mi.title)
+        if tsorder == 'strictly_alphabetic':
            v = mi.title
        else:
            # title_sort might be missing or empty. Check both conditions
            v = mi.get('title_sort', None)
            if not v:
                v = title_sort(mi.title, order=tsorder)
        format_args['title'] = v
    if mi.authors:
        format_args['authors'] = mi.format_authors()
        format_args['author'] = format_args['authors']
@ -157,7 +164,7 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
    else:
        format_args['tags'] = ''
    if mi.series:
-        format_args['series'] = tsfmt(mi.series)
+        format_args['series'] = title_sort(mi.series, order=tsorder)
        if mi.series_index is not None:
            format_args['series_index'] = mi.format_series_index()
    else:
@ -176,7 +183,7 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
            cm = custom_metadata[key]
            ## TODO: NEWMETA: should ratings be divided by 2? The standard rating isn't...
            if cm['datatype'] == 'series':
-                format_args[key] = tsfmt(format_args[key])
+                format_args[key] = title_sort(format_args[key], order=tsorder)
                if key+'_index' in format_args:
                    format_args[key+'_index'] = fmt_sidx(format_args[key+'_index'])
            elif cm['datatype'] == 'datetime':
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -561,9 +561,10 @@ format, whether input or output are available in the conversion dialog under the
 Convert Microsoft Word documents
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|app| does not directly convert .doc files from Microsoft Word. However, in Word, you can save the document
+|app| does not directly convert .doc/.docx files from Microsoft Word. However, in Word, you can save the document
 as HTML and then convert the resulting HTML file with |app|. When saving as HTML, be sure to use the
-"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well.
+"Save as Web Page, Filtered" option as this will produce clean HTML that will convert well. Note that Word
 produces really messy HTML, converting it can take a long time, so be patient.
 There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
 generating the Table of Contents much simpler. It is called BookCreator and is available for free
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sc.po
+++ b/src/calibre/translations/sc.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/ta.po
+++ b/src/calibre/translations/ta.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/translations/th.po
+++ b/src/calibre/translations/th.po
--- a/src/calibre/translations/tr.po
+++ b/src/calibre/translations/tr.po
--- a/src/calibre/translations/uk.po
+++ b/src/calibre/translations/uk.po
--- a/src/calibre/translations/ur.po
+++ b/src/calibre/translations/ur.po
--- a/src/calibre/translations/vi.po
+++ b/src/calibre/translations/vi.po
--- a/src/calibre/translations/yi.po
+++ b/src/calibre/translations/yi.po
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
--- a/src/calibre/translations/zh_HK.po
+++ b/src/calibre/translations/zh_HK.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -8,11 +8,13 @@ import re, htmlentitydefs
 _ascii_pat = None
 def clean_ascii_chars(txt, charlist=None):
-    'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
+    '''
    Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
    This is all control chars except \\t,\\n and \\r
    '''
    global _ascii_pat
    if _ascii_pat is None:
-        chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
+        chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
            + [0x1A, 0x1B]
        _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
    if charlist is None:
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -13,6 +13,7 @@ from calibre.web.feeds.feedparser import parse
 from calibre.utils.logging import default_log
 from calibre import entity_to_unicode, strftime
 from calibre.utils.date import dt_factory, utcnow, local_tz
 from calibre.utils.cleantext import clean_ascii_chars
 class Article(object):
@ -43,7 +44,7 @@ class Article(object):
                print summary.encode('utf-8')
                traceback.print_exc()
                summary = u''
-        self.text_summary = summary
+        self.text_summary = clean_ascii_chars(summary)
        self.author = author
        self.content = content
        self.date = published