Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2010-01-11 06:09:02 -05:00 · 2010-01-11 06:09:02 -05:00 · 5f89c14c19
commit 5f89c14c19
parent 5f5fd2a2a8 b4cff43ee2
37 changed files with 7359 additions and 6265 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -4,6 +4,98 @@
 # for important features/bug fixes.
 # Also, each release can have new and improved recipes.

+- version: 0.6.33
+  date: 2010-01-10
+
+  new features:
+    - title: "The e-book viewer now has built-in dictionary lookup"
+      type: major
+      description: >
+        "You can now right click on a word to lookup its meaning in an online dictionary.
+        calibre uses the public domain dictionaries available at dict.org"
+
+    - title: "RTF Output: Add support for unicode characters"
+
+    - title: "Allow the metadata that is used to create collections when sending books to SONY readers to be customized"
+      description: >
+        "By default collections are created on the SONY reader corresponding to series and tags. Now you can add other
+        metadata fields, like author, or remove ones you dislike, by going to Preferences->Plugins and customizing
+        the device interface plugin corresponding to your device."
+
+    - title: "TXT Input: Add option to disable insertion of Table of Contents into output text."
+      tickets: [4506]
+
+    - title: "Remember state of cover and tag browsing views on restart"
+
+  bug fixes:
+    - title: "EPUB Output: Add id attributes to anchors that have only name, as Adobe Digital Editions apparently can't handle only name attributes"
+      tickets: [4474]
+
+    - title: "Conversion pipeline: Handle the list-style shortcut CSS property correctly"
+      tickets: [4418]
+
+    - title: "EPUB Output: Fix generation of comics with PNG images for the Nook"
+      tickets: [4492]
+
+    - title: "Fix bug that could prevent loading of some custom plugins"
+      tickets: [4414]
+
+    - title: "News download: Handle URLs with both commas and non-ASCII characters correctly"
+
+    - title: "Ignore invalid metadata when adding books from command line instead of erroring out"
+      tickets: [4496]
+
+    - title: Fix remove header/footer assistant when converting HTML files
+      tickets: [4484]
+
+    - title: "Workaround for browsers like iPhone Safari that send extra arguments when downloading books from the content server"
+
+    - title: "Content server: Recognize the HTC HD2 as a mobile browser and add series information to the mobile version of the web page."
+      tickets: [4488]
+
+    - title: "FB2 Output: Properly escape metadata before inserting it into the file"
+      
+    - title: "Don't accept rich text in the comments fields of the edit meta information dialog"
+
+    - title: "Fix device detection for Cybook gen 3 with firmware 2.0"
+
+    - title: "Send to device: Use default save template when driver specific one is empty or unspecified"
+
+    - title: "Fix framework for running post-process and preprocess file type plugins"
+
+    - title: "Linux develop/install commands: Use bindir instead of staging bindir in the launchers"
+      tickets: [4437]
+
+    - title: "E-book viewer: Sanitize file names when unzipping EPUB files"
+      tickets: [4426]
+
+
+  new recipes:
+    - title: The Escapist
+      author: Lorenzo Vigentini
+
+    - title: Washington Post cartoons
+      author: kwetal
+
+    - title: The Dallas Morning News
+      author: Krittika Goyal
+
+    - title: sg.hu
+      author: davotibarna
+
+    - title: The New Zealand Herald
+      author: Krittika Goyal
+
+    - title: Nature News
+      author: Krittika Goyal
+
+  improved recipes:
+    - El Pais
+    - The Economist
+    - The New York Times
+    - Entrepreneur Magazine
+    - CNN
+
 - version: 0.6.32
  date: 2010-01-03

--- a/resources/images/news/wapo_cartoons.png
+++ b/resources/images/news/wapo_cartoons.png
--- a/resources/recipes/corriere_della_sera_en.recipe
+++ b/resources/recipes/corriere_della_sera_en.recipe
@ -1,27 +1,35 @@
 #!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__author__    = 'Lorenzo Vigentini, based on Darko Miletic'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '10, January 2010'
+__description__ = 'Italian daily newspaper (english version)'
 '''
-www.corriere.it/english
+http://www.corriere.it/
 '''

 from calibre.web.feeds.news import BasicNewsRecipe

-class Corriere_en(BasicNewsRecipe):
-    title                 = 'Corriere della Sera in English'
-    __author__            = 'Darko Miletic'
-    description           = 'News from Milan and Italy'
-    oldest_article        = 15
-    publisher             = 'Corriere della Sera'
-    category              = 'news, politics, Italy'
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    encoding              = 'cp1252'
-    remove_javascript     = True
-    language = 'en'
+class ilCorriere(BasicNewsRecipe):
+    __author__     = 'Lorenzo Vigentini, based on Darko Miletic'
+    description    = 'Italian daily newspaper (english version)'

+    cover_url      = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
+    title          = u'Il Corriere della sera (english) '
+    publisher      = 'RCS Digital'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'en'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 1
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+    no_stylesheets = True

    html2lrf_options = [
                          '--comment', description
@ -35,12 +43,13 @@ class Corriere_en(BasicNewsRecipe):
    keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]

    remove_tags = [
-                    dict(name=['base','object','link','embed','img'])
-                   ,dict(name='div', attrs={'class':'news-goback'})
-                   ,dict(name='ul', attrs={'class':'toolbar'})
+                   dict(name=['base','object','link','embed']),
+                   dict(name='div', attrs={'class':'news-goback'}),
+                   dict(name='ul', attrs={'class':'toolbar'})
                  ]

    remove_tags_after = dict(name='p', attrs={'class':'footnotes'})

-    feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]
-
+    feeds = [
+             (u'News'  , u'http://www.corriere.it/rss/english.xml'  )
+            ]
--- a/resources/recipes/corriere_della_sera_it.recipe
+++ b/resources/recipes/corriere_della_sera_it.recipe
@ -1,26 +1,36 @@
 #!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini, based on Darko Miletic'
+__copyright__   = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '10, January 2010'
+__description__ = 'Italian daily newspaper'

-__license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
-www.corriere.it
+http://www.corriere.it/
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-class Corriere_it(BasicNewsRecipe):
-    title                 = 'Corriere della Sera'
-    __author__            = 'Darko Miletic'
-    description           = 'News from Milan and Italy'    
-    oldest_article        = 7
-    publisher             = 'Corriere della Sera'
-    category              = 'news, politics, Italy'        
-    max_articles_per_feed = 100
-    no_stylesheets        = True
-    use_embedded_content  = False
-    encoding              = 'cp1252'
-    remove_javascript     = True
-    language = 'it'

+class ilCorriere(BasicNewsRecipe):
+    __author__     = 'Lorenzo Vigentini, based on Darko Miletic'
+    description    = 'Italian daily newspaper'
+
+    cover_url      = 'http://images.corriereobjects.it/images/static/common/logo_home.gif?v=200709121520'
+    title          = u'Il Corriere della sera '
+    publisher      = 'RCS Digital'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 1
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+    no_stylesheets = True

    html2lrf_options = [
                          '--comment', description
@ -28,29 +38,30 @@ class Corriere_it(BasicNewsRecipe):
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
-    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'

    keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]

    remove_tags = [
-                    dict(name=['base','object','link','embed','img'])
-                   ,dict(name='div', attrs={'class':'news-goback'})
-                   ,dict(name='ul', attrs={'class':'toolbar'})
+                   dict(name=['base','object','link','embed']),
+                   dict(name='div', attrs={'class':'news-goback'}),
+                   dict(name='ul', attrs={'class':'toolbar'})
                  ]

    remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
-    
-    feeds = [ 
-              (u'Ultimora'  , u'http://www.corriere.it/rss/ultimora.xml'  )
-             ,(u'Cronache'  , u'http://www.corriere.it/rss/cronache.xml'  )
-             ,(u'Economia'  , u'http://www.corriere.it/rss/economia.xml'  )
-             ,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
-             ,(u'Esteri'    , u'http://www.corriere.it/rss/esteri.xml'    )
-             ,(u'Politica'  , u'http://www.corriere.it/rss/politica.xml'  )
-             ,(u'Salute'    , u'http://www.corriere.it/rss/salute.xml'    )
-             ,(u'Scienze'   , u'http://www.corriere.it/rss/scienze.xml'   )
-             ,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
-             ,(u'Sport'     , u'http://www.corriere.it/rss/sport.xml'     )
-            ]

+    feeds = [
+             (u'Ultimora'  , u'http://www.corriere.it/rss/ultimora.xml'  ),
+             (u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml'),
+             (u'Cronache'  , u'http://www.corriere.it/rss/cronache.xml'  ),
+             (u'Politica'  , u'http://www.corriere.it/rss/politica.xml'  ),
+             (u'Esteri'    , u'http://www.corriere.it/rss/esteri.xml'    ),
+             (u'Economia'  , u'http://www.corriere.it/rss/economia.xml'  ),
+             (u'Cultura'    , u'http://www.corriere.it/rss/cultura.xml'  ),
+             (u'Scienze'   , u'http://www.corriere.it/rss/scienze.xml'   ),
+             (u'Salute'    , u'http://www.corriere.it/rss/salute.xml'    ),
+             (u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml'),
+             (u'Cinema e TV', u'http://www.corriere.it/rss/cinema.xml'   ),
+             (u'Sport'     , u'http://www.corriere.it/rss/sport.xml'     )
+            ]
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@ -106,7 +106,7 @@ class Economist(BasicNewsRecipe):
        return ans

    def eco_find_image_tables(self, soup):
-        for x in soup.findAll('table', align='right'):
+        for x in soup.findAll('table', align=['right', 'center']):
            if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
                yield x

--- a/resources/recipes/economist_free.recipe
+++ b/resources/recipes/economist_free.recipe
@ -107,7 +107,7 @@ class Economist(BasicNewsRecipe):
        self.log.debug(tb)

    def eco_find_image_tables(self, soup):
-        for x in soup.findAll('table', align='right'):
+        for x in soup.findAll('table', align=['right', 'center']):
            if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
                yield x

--- a/resources/recipes/el_pais.recipe
+++ b/resources/recipes/el_pais.recipe
@ -1,6 +1,8 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
+__author__    = 'Lorenzo Vigentini, based on earlier version by Kovid Goyal'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+description   = 'Main daily newspaper from Spain - v1.02 (10, January 2010)'
 __docformat__ = 'restructuredtext en'

 '''
@ -10,18 +12,54 @@ elpais.es
 from calibre.web.feeds.news import BasicNewsRecipe

 class ElPais(BasicNewsRecipe):
-    title          = u'EL PAIS'
-    language = 'es'
+    __author__        = 'Kovid Goyal & Lorenzo Vigentini'
+    description   = 'Main daily newspaper from Spain'

-    oldest_article = 7
-    max_articles_per_feed = 100
+    cover_url      = 'http://www.elpais.com/im/tit_logo_global.gif'
+    title          = u'El Pais'
+    publisher      = 'Ediciones El Pais SL'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'es'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 2
+    max_articles_per_feed = 15
+
+    use_embedded_content  = False
+    recursion             = 5
+
+    remove_javascript = True
    no_stylesheets = True

-    remove_tags    = [dict(name='div', attrs={'class':'zona_superior'}), dict(name='div', attrs={'class':'limpiar'}), dict(name='div', attrs={'id':'pie'})]
-    extra_css      = 'h1 {font: sans-serif large;} \n h2 {font: sans-serif medium;} \n h3 {font: sans-serif small;} \n h4 {font: sans-serif bold small;} \n p{ font:10pt serif}'
+    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','contenido_noticia','caja_despiece','presentacion']})]
+    extra_css      = '''
+                        p{style:normal size:12 serif}

-    feeds          = [(u'Internacional', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporint'), (u'Espana', u'http://www.elpais.es/rss/rss_section.html?anchor=elppornac'), (u'Deportes', u'http://www.elpais.es/rss/rss_section.html?anchor=elppordep'), (u'Economia', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporeco'), (u'Tecnologia', u'http://www.elpais.es/rss/rss_section.html?anchor=elpportec'), (u'Cultura', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporcul'), (u'Gente', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporgen'), (u'Sociedad', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporsoc'), (u'Opinion', u'http://www.elpais.es/rss/rss_section.html?anchor=elpporopi')]
+                    '''

-    def print_version(self, url):
-        url = url+'?print=1'
-        return url
+    remove_tags    = [
+                        dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
+                        dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada']}),
+                        dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas']})
+                    ]
+
+    feeds          = [
+                        (u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
+                        (u'Internacional', u'http://www.elpais.com/rss/feed.html?feedId=1001'),
+                        (u'Espana', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
+                        (u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
+                        (u'Economia', u'http://www.elpais.com/rss/feed.html?feedId=1006'),
+                        (u'Politica', u'http://www.elpais.com/rss/feed.html?feedId=17073'),
+                        (u'Tecnologia', u'http://www.elpais.com/rss/feed.html?feedId=1005'),
+                        (u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
+                        (u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
+                        (u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
+                        (u'Opinion', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
+                        (u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
+                        (u'Justicia y leyes', u'http://www.elpais.com/rss/feed.html?feedId=17069'),
+                    ]
+
+def print_version(self, url):
+    url = url+'?print=1'
+    return url
--- a/resources/recipes/l_espresso.recipe
+++ b/resources/recipes/l_espresso.recipe
@ -0,0 +1,67 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.02'
+__date__        = '10, January 2010'
+__description__ = 'Italian weekly magazine'
+
+'''espresso.repubblica.it'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class laGazzetta(BasicNewsRecipe):
+    __author__     = 'Lorenzo Vigentini'
+    description    = 'Italian weekly magazine'
+
+    cover_url      = 'http://espresso.repubblica.it/images/logo_espresso.gif'
+    title          = 'l Espresso '
+    publisher      = 'Gruppo editoriale lEspresso'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'it'
+    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 16
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets = True
+
+
+    feeds          = [
+                       (u'Espresso Homepage', u'http://kpm.data.kataweb.it/kpm3eolx/rss/home'),
+                       (u'Espresso Local', u'http://kpm.data.kataweb.it/kpm3eolx/rss/local'),
+                       (u'Espresso Style & Design', u'http://kpm.data.kataweb.it/kpm3eolx/rss/style_design'),
+                       (u'Espresso Opinioni', u'http://kpm.data.kataweb.it/kpm3eolx/rss/opinioni'),
+                       (u'Espresso Rubriche', u'http://kpm.data.kataweb.it/kpm3eolx/rss/rubriche'),
+                       (u'Espresso Limes', u'http://temi.repubblica.it/limes/feed/')
+                    ]
+
+    def print_version(self,url):
+        return url + '/&print=true'
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
+                            dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
+                            dict(name='div', attrs={'id':'content-second-right'})
+                          ]
+
+    remove_tags        = [
+                            dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
+                            dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left']}),
+                            dict(name=['script','noscript','iframe'])
+                         ]
+    extra_css = '''
+                h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+                h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
+                .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
+                '''
+
--- a/resources/recipes/la_gazzeta_dello_sport.recipe
+++ b/resources/recipes/la_gazzeta_dello_sport.recipe
@ -0,0 +1,79 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.02'
+__date__        = '10, January 2010'
+__description__ = 'Sport news from the most read sport newspaper in Italy'
+
+'''www.gazzetta.it'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class laGazzetta(BasicNewsRecipe):
+    __author__        = 'Lorenzo Vigentini'
+    description   = 'Sport news from the most read sport newspaper in Italy'
+
+    cover_url      = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png'
+    title          = 'La Gazzetta dello Sport '
+    publisher      = 'RCS Digital'
+    category       = 'Sport News'
+
+    language       = 'it'
+    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 2
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+    no_stylesheets = True
+
+    keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})]
+
+    remove_tags = [
+                dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}),
+                dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}),
+                dict(name='iframe',attrs={'id':'mirago-feed'}),
+                dict(name='a',attrs={'id':'commenta-up'}),
+                dict(name='cite',attrs={'class':['signature','parag-title']}),
+                dict(name='a',attrs={'class':['last-comment','button-bold2']}),
+                dict(name=['base','object','link','a','script','noscript'])
+            ]
+
+    extra_css      = '''
+                        h1 {font: sans-serif large;}
+                        h2 {font: sans-serif medium;}
+                        h3 {font: sans-serif small;}
+                        h4 {font: sans-serif bold small;}
+                        p  {font:10pt helvetica}
+                        dd {font:8pt helvetica}
+                      '''
+
+    feeds       = [
+                   (u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'),
+                   (u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'),
+                   (u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'),
+                   (u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'),
+                   (u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'),
+                   (u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'),
+                   (u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'),
+                   (u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'),
+                   (u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'),
+                   (u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'),
+                   (u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml')
+                 ]
+
+    def print_version(self,url):
+        segments = url.split('/')
+        basename = '/'.join(segments[:3])+'/'
+        subPath= '/'.join(segments[3:7])+'/'
+        articleURL=(segments[len(segments)-1])[:-6]
+        myArticleSegs=articleURL.split('.')
+        myArticle=myArticleSegs[0]
+        printVerString=myArticle+ '_print.html'
+        myURL = basename + subPath + printVerString
+        print 'this is the url: ' + myURL
+        return basename + subPath + printVerString
--- a/resources/recipes/la_republica.recipe
+++ b/resources/recipes/la_republica.recipe
@ -1,29 +1,55 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Lorenzo Vigentini, based on Darko Miletic'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
+description   = 'Italian daily newspaper - v1.01 (04, January 2010)'
+
+'''
+http://www.repubblica.it/
+'''
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class LaRepublica(BasicNewsRecipe):
-    title          = u'la Repubblica'
-    oldest_article = 1
-    language = 'it'
+    author        = 'Lorenzo Vigentini, based on Darko Miletic'
+    description   = 'Italian daily newspaper'

-    author = 'Darko Miletic'
+    cover_url      = 'http://www.repubblica.it/images/homepage/la_repubblica_logo.gif'
+    title          = u'La Repubblica'
+    publisher      = 'Gruppo editoriale L\'Espresso'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 1
    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
    remove_javascript = True
-    no_stylesheets = True
-    
+
    keep_only_tags     = [dict(name='div', attrs={'class':'articolo'})]

-
    remove_tags        = [
-                            dict(name=['object','link'])
-                           ,dict(name='span',attrs={'class':'linkindice'})
-                           ,dict(name='div',attrs={'class':'bottom-mobile'})
-                           ,dict(name='div',attrs={'id':['rssdiv','blocco']})
+                            dict(name=['object','link']),
+                            dict(name='span',attrs={'class':'linkindice'}),
+                            dict(name='div',attrs={'class':'bottom-mobile'}),
+                            dict(name='div',attrs={'id':['rssdiv','blocco']})
                         ]
-    
+
    feeds          = [
-                       (u'Repubblica homepage', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
+                       (u'Repubblica Rilievo', u'http://www.repubblica.it/rss/homepage/rss2.0.xml'),
+                       (u'Repubblica Cronaca', u'http://www.repubblica.it/rss/cronaca/rss2.0.xml'),
+                       (u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml'),
+                       (u'Repubblica Economia', u'http://www.repubblica.it/rss/economia/rss2.0.xml'),
+                       (u'Repubblica Politica', u'http://www.repubblica.it/rss/politica/rss2.0.xml'),
                       (u'Repubblica Scienze', u'http://www.repubblica.it/rss/scienze/rss2.0.xml'),
                       (u'Repubblica Tecnologia', u'http://www.repubblica.it/rss/tecnologia/rss2.0.xml'),
-                       (u'Repubblica Esteri', u'http://www.repubblica.it/rss/esteri/rss2.0.xml')
+                       (u'Repubblica Scuola e Universita', u'http://www.repubblica.it/rss/scuola_e_universita/rss2.0.xml'),
+                       (u'Repubblica Ambiente', u'http://www.repubblica.it/rss/ambiente/rss2.0.xml'),
+		       (u'Repubblica Cultura', u'http://www.repubblica.it/rss/spettacoli_e_cultura/rss2.0.xml'),
+		       (u'Repubblica Persone', u'http://www.repubblica.it/rss/persone/rss2.0.xml'),
+		       (u'Repubblica Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
+		       (u'Repubblica Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml')
                     ]
-
--- a/resources/recipes/nzz_ger.recipe
+++ b/resources/recipes/nzz_ger.recipe
@ -1,4 +1,3 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
@ -20,29 +19,25 @@ class Nzz(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
-    lang                  = 'de-CH'
-    language = 'de'
+    language              = 'de'

-
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
+    conversion_options = {
+                             'comments'  : description
+                            ,'tags'      : category
+                            ,'language'  : language
+                            ,'publisher' : publisher
+                         }

    keep_only_tags = [dict(name='div', attrs={'class':'article'})]

    remove_tags = [
-                     dict(name=['object','link','base','script'])
+                     dict(name=['object','link','base'])
                    ,dict(name='div',attrs={'class':['more','teaser','advXertXoriXals','legal']})
                    ,dict(name='div',attrs={'id':['popup-src','readercomments','google-ad','advXertXoriXals']})
                  ]

    feeds = [
-               (u'Neuste Artikel', u'http://www.nzz.ch/feeds/recent/'                     )
-              ,(u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true')
+               (u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true')
              ,(u'Schweiz'       , u'http://www.nzz.ch/nachrichten/schweiz?rss=true')
              ,(u'Wirtschaft'    , u'http://www.nzz.ch/nachrichten/wirtschaft/aktuell?rss=true')
              ,(u'Finanzmaerkte' , u'http://www.nzz.ch/finanzen/nachrichten?rss=true')
@ -55,13 +50,7 @@ class Nzz(BasicNewsRecipe):
              ,(u'Reisen'        , u'http://www.nzz.ch/magazin/reisen?rss=true')
            ]

-    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
-        soup.head.insert(0,mtag)
-        return soup
-
    def print_version(self, url):
        return url + '?printview=true'

+
--- a/resources/recipes/panorama.recipe
+++ b/resources/recipes/panorama.recipe
@ -0,0 +1,51 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '10, January 2010'
+__description__ = 'Italian weekly magazine'
+
+'''
+http://www.panorama.it/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class panorama(BasicNewsRecipe):
+    __author__        = 'Lorenzo Vigentini, based on Darko Miletic'
+    description   = 'Italian weekly magazine'
+
+    cover_url      = 'http://www.panorama.it/panorama/images/panorama_large.gif'
+    title          = u'Panorama '
+    publisher      = 'Mondadori'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+
+    keep_only_tags     = [dict(name='div', attrs={'class':['post','article']})]
+
+    remove_tags        = [
+                            dict(name=['object','link']),
+                            dict(name='div',attrs={'class':['post-meta','sharing-tools','related','comments','prev-next']}),
+                            dict(name='div',attrs={'id':['related-posts','footer']})
+                         ]
+
+    feeds          = [
+                       (u'Panorama Italia', u'http://blog.panorama.it/italia/feed'),
+                       (u'Panorama Mondo', u'http://blog.panorama.it/mondo/feed'),
+                       (u'Panorama Cultura e societa', u'http://blog.panorama.it/culturaesocieta/feed'),
+                       (u'Panorama Hitech e scienza', u'http://blog.panorama.it/hitechescienza/feed'),
+                       (u'Panorama Motori', u'http://blog.panorama.it/autoemoto/feed'),
+                       (u'Panorama libri', u'http://blog.panorama.it/libri/feed'),
+                       (u'Panorama Opinioni', u'http://blog.panorama.it/opinioni/feed'),
+
+                     ]
--- a/resources/recipes/quotidiano.recipe
+++ b/resources/recipes/quotidiano.recipe
@ -0,0 +1,52 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Lorenzo Vigentini'
+__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '10, January 2010'
+__description__ = 'Italian News Agency'
+
+'''
+http://www.quotidianonet.ilsole24ore.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class panorama(BasicNewsRecipe):
+    __author__     = 'Lorenzo Vigentini, based on Darko Miletic'
+    description    = 'Italian News Agency'
+
+    cover_url      = 'http://quotidianonet.ilsole24ore.com/file_generali/img/logo_quotidianonet-top.gif'
+    title          = u'Quotidiano Net '
+    publisher      = 'italiaNews'
+    category       = 'News, politics, culture, economy, general interest'
+
+    language       = 'it'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript = True
+
+    keep_only_tags     = [dict(name='div', attrs={'class':'box_contenuto articolo'})]
+
+    remove_tags        = [
+                            dict(name=['object','link']),
+                            dict(name='div',attrs={'class':['post-meta','sharing-tools','related','comments','prev-next','box_contenuto adsense']}),
+                            dict(name='div',attrs={'id':['strumenti','related-posts','footer','inline_boxes','inline_boxes_header','inline_boxes_body','bottom']}),
+                            dict(name='span',attrs={'class':'titolosezione default'})
+                         ]
+
+    feeds          = [
+                       (u'Prima pagina', u'http://quotidianonet.ilsole24ore.com/rss/home.xml'),
+                       (u'Cronaca', u'http://quotidianonet.ilsole24ore.com/rss/cronaca.xml'),
+                       (u'Economia', u'http://quotidianonet.ilsole24ore.com/rss/economia.xml'),
+                       (u'Esteri', u'http://quotidianonet.ilsole24ore.com/rss/esteri.xml'),
+                       (u'Politica', u'http://quotidianonet.ilsole24ore.com/rss/politica.xml'),
+                       (u'Salute', u'http://quotidianonet.ilsole24ore.com/rss/salute.xml'),
+                       (u'Tecnologia', u'http://quotidianonet.ilsole24ore.com/rss/tecnologia.xml'),
+
+                     ]
--- a/resources/recipes/starwars.recipe
+++ b/resources/recipes/starwars.recipe
@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class TheForce(BasicNewsRecipe):
+    title          = u'The Force'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    encoding = 'cp1252'
+
+    remove_stylesheets = True
+    #remove_javascripts = True
+    conversion_options = { 'linearize_tables' : True }
+    remove_tags_after= dict(name='div', attrs={'class':'KonaBody'})
+    keep_only_tags = dict(name='td', attrs={'background':'/images/span/tile_story_bgtile.gif'})
+    #keep_only_tags = dict(name='div', attrs={'class':'KonaBody'})
+    remove_tags = [
+       dict(name='iframe'),
+       #dict(name='div', attrs={'class':['pt-box-title', 'pt-box-content', 'blog-entry-footer', 'item-list', 'article-sub-meta']}),
+       #dict(name='div', attrs={'id':['block-td_search_160', 'block-cam_search_160']}),
+       #dict(name='table', attrs={'cellspacing':'0'}),
+       #dict(name='ul', attrs={'class':'articleTools'}),	
+    ]
+
+    feeds          = [
+('The Force', 
+ 'http://www.theforce.net/outnews/tfnrdf.xml'),
+]
+
+    def preprocess_html(self, soup):
+        for tag in soup.findAll(name='i'):
+            if 'Remember to join the Star Wars Insider Facebook' in self.tag_to_string(tag):
+               for x in tag.findAllNext():
+                   x.extract()
+               tag.extract()
+               break
+        tag = soup.find(attrs={'class':'articleoption'})
+        if tag is not None:
+            tag = tag.findParent('table')
+            if tag is not None:
+                for x in tag.findAllNext():
+                    x.extract()
+            tag.extract()
+
+        for img in soup.findAll('img', src=True):
+            a = img.findParent('a', href=True)
+            if a is None: continue
+            url = a.get('href').split('?')[-1].partition('=')[-1]
+            if url:
+                img.extract()
+                a.name = 'img'
+                a['src'] = url
+                del a['href']
+                img['src'] = url
+        return soup
--- a/resources/recipes/wapo_cartoons.recipe
+++ b/resources/recipes/wapo_cartoons.recipe
@ -0,0 +1,145 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from datetime import date, timedelta
+
+class WaPoCartoonsRecipe(BasicNewsRecipe):
+    __license__   = 'GPL v3'
+    __author__ = 'kwetal'
+    language = 'en'
+    version = 2
+
+    title = u'Washington Post Cartoons'
+    publisher = u'Washington Post'
+    category = u'News, Cartoons'
+    description = u'Cartoons from the Washington Post'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    no_stylesheets = True
+
+    feeds = []
+    feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
+    feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
+    feeds.append((u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
+    feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
+    feeds.append((u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
+    feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
+    feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
+    feeds.append((u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
+    feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))
+
+    extra_css = '''
+                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
+                h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
+                #name {margin-bottom: 0.2em}
+                #copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
+                '''
+
+    def parse_index(self):
+        index = []
+        oldestDate = date.today() - timedelta(days = self.oldest_article)
+        oldest = oldestDate.strftime('%Y%m%d')
+        for feed in self.feeds:
+            cartoons = []
+            soup = self.index_to_soup(feed[1])
+
+            cartoon = {'title': 'Current', 'date': None, 'url': feed[1], 'description' : ''}
+            cartoons.append(cartoon)
+
+            select = soup.find('select', attrs = {'name': ['url', 'dest']})
+            if select:
+                cartoonCandidates = []
+                if select['name'] == 'url':
+                    cartoonCandidates = self.cartoonCandidatesWaPo(select, oldest)
+                else:
+                    cartoonCandidates = self.cartoonCandidatesCreatorsCom(select, oldest)
+
+                for cartoon in cartoonCandidates:
+                    cartoons.append(cartoon)
+
+            index.append([feed[0], cartoons])
+
+        return index
+
+    def preprocess_html(self, soup):
+        freshSoup = self.getFreshSoup(soup)
+
+        div = soup.find('div', attrs = {'id': 'name'})
+        if div:
+            freshSoup.body.append(div)
+            comic = soup.find('div', attrs = {'id': 'comic_full'})
+
+            img = comic.find('img')
+            if '&' in img['src']:
+                img['src'], sep, bad = img['src'].rpartition('&')
+
+            freshSoup.body.append(comic)
+            freshSoup.body.append(soup.find('div', attrs = {'id': 'copyright'}))
+        else:
+            span = soup.find('span', attrs = {'class': 'title'})
+            if span:
+                del span['class']
+                span['id'] = 'name'
+                span.name = 'div'
+                freshSoup.body.append(span)
+
+            img = soup.find('img', attrs = {'class': 'pic_big'})
+            if img:
+                td = img.parent
+                if td.has_key('style'):
+                    del td['style']
+                td.name = 'div'
+                td['id'] = 'comic_full'
+                freshSoup.body.append(td)
+
+            td = soup.find('td', attrs = {'class': 'copy'})
+            if td:
+                for a in td.find('a'):
+                    a.extract()
+                del td['class']
+                td['id'] = 'copyright'
+                td.name = 'div'
+                freshSoup.body.append(td)
+
+        return freshSoup
+
+    def getFreshSoup(self, oldSoup):
+        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
+        if oldSoup.head.title:
+            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
+        return freshSoup
+
+    def cartoonCandidatesWaPo(self, select, oldest):
+        opts = select.findAll('option')
+        for i in range(1, len(opts)):
+            url = opts[i]['value'].rstrip('/')
+            dateparts = url.split('/')[-3:]
+            datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
+            if datenum >= oldest:
+                yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
+            else:
+                return
+
+    def cartoonCandidatesCreatorsCom(self, select, oldest):
+        monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
+                      'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
+                      'November': '11', 'December': '12'}
+
+        opts = select.findAll('option')
+        for i in range(1, len(opts)):
+            if opts[i].has_key('selected'):
+                continue
+
+            dateString = self.tag_to_string(opts[i])
+            rest, sep, year = dateString.rpartition(', ')
+            parts = rest.split(' ')
+            day = parts[2].rjust(2, '0')
+            month = monthNames[parts[1]]
+            datenum = str(year) + month + str(day)
+            if datenum >= oldest:
+                yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
+            else:
+                return
+
+
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.32'
+__version__   = '0.6.33'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -264,6 +264,11 @@ class EPUBOutput(OutputFormatPlugin):
            if body:
                body = body[0]

+            # Add id attribute to <a> tags that have name
+            for x in XPath('//h:a[@name]')(body):
+                if not x.get('id', False):
+                    x.set('id', x.get('name'))
+
            # Replace <br> that are children of <body> as ADE doesn't handle them
            if hasattr(body, 'xpath'):
                for br in XPath('./h:br')(body):
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -237,6 +237,8 @@ class Stylizer(object):
                style.update(self._normalize_edge(prop.cssValue, name))
            elif name == 'font':
                style.update(self._normalize_font(prop.cssValue))
+            elif name == 'list-style':
+                style.update(self._normalize_list_style(prop.cssValue))
            else:
                style[name] = prop.value
        if 'font-size' in style:
@ -269,6 +271,31 @@ class Stylizer(object):
            style["%s-%s" % (name, edge)] = value
        return style

+    def _normalize_list_style(self, cssvalue):
+        composition = ('list-style-type', 'list-style-position',
+                       'list-style-image')
+        style = {}
+        if cssvalue.cssText == 'inherit':
+            for key in composition:
+                style[key] = 'inherit'
+        else:
+            try:
+                primitives = [v.cssText for v in cssvalue]
+            except TypeError:
+                primitives = [cssvalue.cssText]
+            primitives.reverse()
+            value = primitives.pop()
+            for key in composition:
+                if cssprofiles.validate(key, value):
+                    style[key] = value
+                    if not primitives: break
+                    value = primitives.pop()
+            for key in composition:
+                if key not in style:
+                    style[key] = DEFAULTS[key]
+
+        return style
+
    def _normalize_font(self, cssvalue):
        composition = ('font-style', 'font-variant', 'font-weight',
                       'font-size', 'line-height', 'font-family')
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
@ -4,9 +4,9 @@
 #
 msgid ""
 msgstr ""
-"Project-Id-Version: calibre 0.6.32\n"
-"POT-Creation-Date: 2010-01-09 10:18+MST\n"
-"PO-Revision-Date: 2010-01-09 10:18+MST\n"
+"Project-Id-Version: calibre 0.6.33\n"
+"POT-Creation-Date: 2010-01-10 16:40+MST\n"
+"PO-Revision-Date: 2010-01-10 16:40+MST\n"
 "Last-Translator: Automatically generated\n"
 "Language-Team: LANGUAGE\n"
 "MIME-Version: 1.0\n"
@ -123,11 +123,11 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/library/database.py:913
 #: /home/kovid/work/calibre/src/calibre/library/database2.py:703
 #: /home/kovid/work/calibre/src/calibre/library/database2.py:715
-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1135
-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1172
-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1509
-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1511
-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1622
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1143
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1180
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1517
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1519
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1630
 #: /home/kovid/work/calibre/src/calibre/library/server.py:645
 #: /home/kovid/work/calibre/src/calibre/library/server.py:717
 #: /home/kovid/work/calibre/src/calibre/library/server.py:764
@ -585,7 +585,7 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/tag_view.py:132
 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1068
 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1072
-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1409
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1417
 msgid "News"
 msgstr ""

@ -1955,6 +1955,10 @@ msgstr ""
 msgid "Run the text input through the markdown pre-processor. To learn more about markdown see"
 msgstr ""

+#: /home/kovid/work/calibre/src/calibre/ebooks/txt/input.py:35
+msgid "Do not insert a Table of Contents into the output text."
+msgstr ""
+
 #: /home/kovid/work/calibre/src/calibre/ebooks/txt/output.py:24
 msgid "Type of newline to use. Options are %s. Default is 'system'. Use 'old_mac' for compatibility with Mac OS 9 and earlier. For Mac OS X use 'unix'. 'system' will default to the newline type used by this OS."
 msgstr ""
@ -2203,7 +2207,7 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/gui2/convert/rb_output_ui.py:28
 #: /home/kovid/work/calibre/src/calibre/gui2/convert/structure_detection_ui.py:59
 #: /home/kovid/work/calibre/src/calibre/gui2/convert/toc_ui.py:62
-#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:38
+#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:42
 #: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output_ui.py:45
 #: /home/kovid/work/calibre/src/calibre/gui2/convert/xexp_edit_ui.py:49
 #: /home/kovid/work/calibre/src/calibre/gui2/convert/xpath_wizard_ui.py:67
@ -2752,12 +2756,12 @@ msgid "PDB Input"
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:32
-#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:39
+#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:43
 msgid "Treat each &line as a paragraph"
 msgstr ""

 #: /home/kovid/work/calibre/src/calibre/gui2/convert/pdb_input_ui.py:33
-#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:42
+#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:44
 msgid "Assume print formatting"
 msgstr ""

@ -2975,14 +2979,18 @@ msgstr ""
 msgid "TXT Input"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:40
+#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:45
 msgid "Process using markdown"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:41
+#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:46
 msgid "<p>Markdown is a simple markup language for text files, that allows for advanced formatting. To learn more visit <a href=\"http://daringfireball.net/projects/markdown\">markdown</a>."
 msgstr ""

+#: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_input_ui.py:47
+msgid "Do not insert Table of Contents into output text when using markdown"
+msgstr ""
+
 #: /home/kovid/work/calibre/src/calibre/gui2/convert/txt_output.py:16
 msgid "TXT Output"
 msgstr ""
@ -6639,27 +6647,27 @@ msgid ""
 "For help on an individual command: %%prog command --help\n"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1648
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1656
 msgid "<p>Migrating old database to ebook library in %s<br><center>"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1677
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1685
 msgid "Copying <b>%s</b>"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1694
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1702
 msgid "Compacting database"
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1787
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1795
 msgid "Checking SQL integrity..."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1824
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1832
 msgid "Checking for missing files."
 msgstr ""

-#: /home/kovid/work/calibre/src/calibre/library/database2.py:1846
+#: /home/kovid/work/calibre/src/calibre/library/database2.py:1854
 msgid "Checked id"
 msgstr ""

--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -158,7 +158,7 @@ class RecursiveFetcher(object):
                pass

        def remove_beyond(tag, next):
-            while tag is not None and tag.name != 'body':
+            while tag is not None and getattr(tag, 'name', None) != 'body':
                after = getattr(tag, next)
                while after is not None:
                    ns = getattr(tag, next)