Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-02-01 15:18:04 +00:00 · 2011-02-01 15:18:04 +00:00 · fdb842b036
commit fdb842b036
parent 00210f4b7b d2ba1812bb
20 changed files with 606 additions and 158 deletions
--- a/resources/images/news/latimes.png
+++ b/resources/images/news/latimes.png
--- a/resources/recipes/20_minutos.recipe
+++ b/resources/recipes/20_minutos.recipe
@ -1,25 +1,25 @@
 # -*- coding: utf-8
 __license__   = 'GPL v3'
 __author__    = 'Luis Hernandez'
 __copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
-description   = 'Periódico gratuito en español - v0.8 - 27 Jan 2011'
+__version__     = 'v0.85'
 __date__        = '31 January 2011'
 '''
 www.20minutos.es
 '''
-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1294946868(BasicNewsRecipe):
-    title          = u'20 Minutos'
+    title          = u'20 Minutos new'
    publisher      = u'Grupo 20 Minutos'
-    __author__            = 'Luis Hernández'
+    __author__            = 'Luis Hernandez'
-    description           = 'Periódico gratuito en español'
+    description           = 'Free spanish newspaper'
    cover_url     = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
-    oldest_article = 5
+    oldest_article = 2
    max_articles_per_feed = 100
    remove_javascript = True
@ -29,6 +29,7 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
    encoding              = 'ISO-8859-1'
    language              = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    remove_empty_feeds    = True
    keep_only_tags     = [
                                   dict(name='div', attrs={'id':['content','vinetas',]})
@ -43,13 +44,21 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
    remove_tags = [
                     dict(name='ol', attrs={'class':['navigation',]})
                    ,dict(name='span', attrs={'class':['action']})
-                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
+                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
                    ,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
                    ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
                    ,dict(name='ul', attrs={'id':['site-links']})
                    ,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
                       ]
    extra_css             = """
                               p{text-align: justify; font-size: 100%}
                               body{ text-align: left; font-size:100% }
                               h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
                                 """
    preprocess_regexps = [(re.compile(r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
    feeds = [
              (u'Portada'              , u'http://www.20minutos.es/rss/')
             ,(u'Nacional'             , u'http://www.20minutos.es/rss/nacional/')
@ -65,6 +74,6 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
             ,(u'Empleo'              , u'http://www.20minutos.es/rss/empleo/')
             ,(u'Cine'                  , u'http://www.20minutos.es/rss/cine/')
             ,(u'Musica'               , u'http://www.20minutos.es/rss/musica/')
-             ,(u'Vinetas'              , u'http://www.20minutos.es/rss/vinetas/')
+             ,(u'Vinetas'          , u'http://www.20minutos.es/rss/vinetas/')
             ,(u'Comunidad20'     , u'http://www.20minutos.es/rss/zona20/')
            ]
--- a/resources/recipes/cinco_dias.recipe
+++ b/resources/recipes/cinco_dias.recipe
@ -0,0 +1,71 @@
 __license__   = 'GPL v3'
 __author__    = 'Luis Hernandez'
 __copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
 __version__     = 'v1.2'
 __date__        = '31 January 2011'
 '''
 http://www.cincodias.com/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1294946868(BasicNewsRecipe):
    title          = u'Cinco Dias'
    publisher      = u'Grupo Prisa'
    __author__            = 'Luis Hernandez'
    description           = 'spanish web about money and bussiness, free edition'
    cover_url     = 'http://www.prisa.com/images/logos/logo_cinco_dias.gif'
    oldest_article = 2
    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'es'
    remove_empty_feeds    = True
    encoding               = 'ISO-8859-1'
    timefmt        = '[%a, %d %b, %Y]'
    keep_only_tags     = [
                                    dict(name='div', attrs={'class':['cab_articulo cab_noticia','pos_3','txt_noticia','mod_despiece']})
                                   ,dict(name='p', attrs={'class':['cintillo']})
                                ]
    remove_tags_before = dict(name='div' , attrs={'class':['publi_h']})
    remove_tags_after = dict(name='div' , attrs={'class':['tab_util util_estadisticas']})
    remove_tags = [
                             dict(name='div', attrs={'class':['util-1','util-2','util-3','inner estirar','inner1','inner2','inner3','cont','tab_util util_estadisticas','tab_util util_enviar','mod_list_inf','mod_similares','mod_divisas','mod_sectores','mod_termometro','mod post','mod_img','mod_txt','nivel estirar','barra estirar','info_brujula btnBrujula','utilidad_brujula estirar']})
                            ,dict(name='li', attrs={'class':['lnk-fcbook','lnk-retweet','lnk-meneame','desplegable','comentarios','list-options','estirar']})
                            ,dict(name='ul', attrs={'class':['lista-izquierda','list-options','estirar']})
                            ,dict(name='p', attrs={'class':['autor']})
                         ]
    extra_css             = """
                               p{text-align: justify; font-size: 100%}
                               body{ text-align: left; font-size:100% }
                               h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
                               h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; }
                                 """
    feeds = [
                  (u'Ultima Hora'              , u'http://www.cincodias.com/rss/feed.html?feedId=17029')
                 ,(u'Empresas'                 , u'http://www.cincodias.com/rss/feed.html?feedId=19')
                 ,(u'Mercados'                 , u'http://www.cincodias.com/rss/feed.html?feedId=20')
                 ,(u'Economia'                 , u'http://www.cincodias.com/rss/feed.html?feedId=21')
                 ,(u'Tecnorama'               , u'http://www.cincodias.com/rss/feed.html?feedId=17230')
                 ,(u'Tecnologia'                , u'http://www.cincodias.com/rss/feed.html?feedId=17106')
                 ,(u'Finanzas Personales'  , u'http://www.cincodias.com/rss/feed.html?feedId=22')
                 ,(u'Fiscalidad'                 , u'http://www.cincodias.com/rss/feed.html?feedId=17107')
                 ,(u'Vivienda'                   , u'http://www.cincodias.com/rss/feed.html?feedId=17108')
                 ,(u'Tendencias'               , u'http://www.cincodias.com/rss/feed.html?feedId=17109')
                 ,(u'Empleo'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17110')
                 ,(u'IBEX 35'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17125')
                 ,(u'Sectores'                  , u'http://www.cincodias.com/rss/feed.html?feedId=17126')
                 ,(u'Opinion'                    , u'http://www.cincodias.com/rss/feed.html?feedId=17105')
              ]
--- a/resources/recipes/latimes.recipe
+++ b/resources/recipes/latimes.recipe
@ -1,73 +1,92 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-latimes.com
+www.latimes.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class LATimes(BasicNewsRecipe):
-    title                 = u'The Los Angeles Times'
+    title                 = 'Los Angeles Times'
-    __author__            = u'Darko Miletic and Sujata Raman'
+    __author__            = 'Darko Miletic'
-    description           = u'News from Los Angeles'
+    description           = 'The Los Angeles Times is a leading source of news on Southern California, entertainment, movies, television, music, politics, business, health, technology, travel, sports, environment, economics, autos, jobs, real estate and other topics affecting California'
-    oldest_article        = 7
+    publisher             = 'Tribune Company'
-    max_articles_per_feed = 100
+    category              = 'news, politics, USA, Los Angeles, world'
-    language              = 'en'
+    oldest_article        = 2
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
-    encoding              = 'utf-8'
+    language              = 'en'
-    lang                  = 'en-US'
+    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.latimes.com/images/logo.png'
    cover_url             = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
    extra_css             = """
                               body{font-family: Georgia,"Times New Roman",Times,serif }
                               img{margin-bottom: 0.4em; margin-top: 0.8em; display:block}
                               h2{font-size: 1.1em}
                               .deckhead{font-size: small; text-transform: uppercase}
                               .small{color: gray; font-size: small}
                               .date,.time,.copyright{font-size: x-small; color:gray; font-style:italic;}
                            """
    conversion_options = {
-          'comment'          : description
+                          'comment'          : description
-        , 'language'         : lang
+                        , 'tags'             : category
-    }
+                        , 'publisher'        : publisher
                        , 'language'         : language
                        , 'linearize_tables' : 'Yes'
                        }
-    extra_css = '''
+    keep_only_tags = [
-                h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
+                        dict(name='div', attrs={'class':'story'})
-                h2{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
+                       ,dict(attrs={'class':['entry-header','time','entry-content']})
-                .story{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
+                     ]
-                .entry-body{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
+    remove_tags_after=dict(name='p', attrs={'class':'copyright'})
-                .entry-more{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
+    remove_tags = [
-                .credit{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
+                     dict(name=['meta','link','iframe','object','embed'])
-                .small{color:#666666; font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
+                    ,dict(attrs={'class':['toolSet','articlerail','googleAd','entry-footer-left','entry-footer-right','entry-footer-social','google-ad-story-bottom','sphereTools']})
-                .byline{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
+                    ,dict(attrs={'id':['article-promo','googleads','moduleArticleToolsContainer','gallery-subcontent']})
-                .date{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
+                  ]
-                .time{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; font-style:italic;}
+    remove_attributes=['lang','xmlns:fb','xmlns:og','border','xtags','i','article_body']
                .copyright{font-family :Georgia,"Times New Roman",Times,serif; font-size: xx-small;color:#930000; }
                .subhead{font-family :Georgia,"Times New Roman",Times,serif; font-size:x-small;}
                '''
   # recursions = 1
   # match_regexps = [r'http://www.latimes.com/.*page=[2-9]']
    keep_only_tags    = [dict(name='div', attrs={'class':["story"  ,"entry"] })]
-    remove_tags      = [   dict(name='div', attrs={'class':['articlerail',"sphereTools","tools","toppaginate","entry-footer-left","entry-footer-right"]}),
+    feeds = [
-                            dict(name='div', attrs={'id':["moduleArticleToolsContainer",]}),
+              (u'Top News'             , u'http://feeds.latimes.com/latimes/news'                           )
-                            dict(name='p', attrs={'class':["entry-footer",]}),
+             ,(u'Local News'           , u'http://feeds.latimes.com/latimes/news/local'                     )
-                           dict(name='ul', attrs={'class':"article-nav clearfix"}),
+             ,(u'National'             , u'http://feeds.latimes.com/latimes/news/nationworld/nation'        )
-                            dict(name=['iframe'])
+             ,(u'National Politics'    , u'http://feeds.latimes.com/latimes/news/politics/'                 )
-                        ]
+             ,(u'Business'             , u'http://feeds.latimes.com/latimes/business'                       )
-
+             ,(u'Education'            , u'http://feeds.latimes.com/latimes/news/education'                 )
-
+             ,(u'Environment'          , u'http://feeds.latimes.com/latimes/news/science/environment'       )
-    feeds          = [(u'News', u'http://feeds.latimes.com/latimes/news')
+             ,(u'Religion'             , u'http://feeds.latimes.com/latimes/features/religion'              )
-                      ,(u'Local','http://feeds.latimes.com/latimes/news/local')
+             ,(u'Science'              , u'http://feeds.latimes.com/latimes/news/science'                   )
-                      ,(u'MostEmailed','http://feeds.latimes.com/MostEmailed')
+             ,(u'Technology'           , u'http://feeds.latimes.com/latimes/technology'                     )
-                      ,(u'Politics','http://feeds.latimes.com/latimes/news/local/politics/cal/')
+             ,(u'Africa'               , u'http://feeds.latimes.com/latimes/africa'                         )
-                      ,('OrangeCounty','http://feeds.latimes.com/latimes/news/local/orange/')
+             ,(u'Asia'                 , u'http://feeds.latimes.com/latimes/asia'                           )
-                      ,('National','http://feeds.latimes.com/latimes/news/nationworld/nation')
+             ,(u'Europe'               , u'http://feeds.latimes.com/latimes/europe'                         )
-                      ,('Politics','http://feeds.latimes.com/latimes/news/politics/')
+             ,(u'Latin America'        , u'http://feeds.latimes.com/latimes/latinamerica'                   )
-                      ,('Business','http://feeds.latimes.com/latimes/business')
+             ,(u'Middle East'          , u'http://feeds.latimes.com/latimes/middleeast'                     )
-                      ,('Sports','http://feeds.latimes.com/latimes/sports/')
+             ,(u'Arts&Culture'         , u'http://feeds.feedburner.com/latimes/entertainment/news/arts'     )
-                      ,('Entertainment','http://feeds.latimes.com/latimes/entertainment/')
+             ,(u'Entertainment News'   , u'http://feeds.feedburner.com/latimes/entertainment/news/'         )
-                      ]
+             ,(u'Movie News'           , u'http://feeds.feedburner.com/latimes/entertainment/news/movies/'  )
-
+             ,(u'Movie Reviews'        , u'http://feeds.feedburner.com/movies/reviews/'                     )
             ,(u'Music News'           , u'http://feeds.feedburner.com/latimes/entertainment/news/music/'   )
             ,(u'Pop Album Reviews'    , u'http://feeds.feedburner.com/latimes/pop-album-reviews'           )
             ,(u'Restaurant Reviews'   , u'http://feeds.feedburner.com/latimes/restaurant/reviews'          )
             ,(u'Theatar and Dance'    , u'http://feeds.feedburner.com/latimes/theaterdance'                )
             ,(u'Autos'                , u'http://feeds.latimes.com/latimes/classified/automotive/highway1/')
             ,(u'Books'                , u'http://feeds.latimes.com/features/books'                         )
             ,(u'Food'                 , u'http://feeds.latimes.com/latimes/features/food/'                 )
             ,(u'Health'               , u'http://feeds.latimes.com/latimes/features/health/'               )
             ,(u'Real Estate'          , u'http://feeds.latimes.com/latimes/classified/realestate/'         )
             ,(u'Commentary'           , u'http://feeds2.feedburner.com/latimes/news/opinion/commentary/'   )
             ,(u'Sports'               , u'http://feeds.latimes.com/latimes/sports/'                        )
            ]
    def get_article_url(self, article):
-        ans = article.get('feedburner_origlink').rpartition('?')[0]
+        ans = BasicNewsRecipe.get_article_url(self, article).rpartition('?')[0]
        try:
            self.log('Looking for full story link in', ans)
@ -83,4 +102,22 @@ class LATimes(BasicNewsRecipe):
            pass
        return ans
-
+    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name  ='div'
                  item.attrs =[]
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        return soup
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -139,6 +139,13 @@ class CHMReader(CHMFile):
        if self.hhc_path not in files and files:
            self.hhc_path = files[0]
        if self.hhc_path == '.hhc' and self.hhc_path not in files:
            from calibre import walk
            for x in walk(output_dir):
                if os.path.basename(x).lower() in ('index.htm', 'index.html'):
                    self.hhc_path = os.path.relpath(x, output_dir)
                    break
    def _reformat(self, data, htmlpath):
        try:
            data = xml_to_unicode(data, strip_encoding_pats=True)[0]
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -175,6 +175,19 @@ class EPUBInput(InputFormatPlugin):
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if ref is None:
                x.getparent().remove(x)
                continue
            for y in opf.itermanifest():
                if y.get('id', None) == ref and y.get('media-type', None) in \
                    ('application/vnd.adobe-page-template+xml',):
                        p = x.getparent()
                        if p is not None:
                            p.remove(x)
                        break
        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -0,0 +1,61 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re
 from calibre.customize import Plugin
 class Source(Plugin):
    type = _('Metadata source')
    author = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    result_of_identify_is_complete = True
    def get_author_tokens(self, authors):
        'Take a list of authors and return a list of tokens useful for a '
        'AND search query'
        # Leave ' in there for Irish names
        pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
        for au in authors:
            for tok in au.split():
                yield pat.sub('', tok)
    def split_jobs(self, jobs, num):
        'Split a list of jobs into at most num groups, as evenly as possible'
        groups = [[] for i in range(num)]
        jobs = list(jobs)
        while jobs:
            for gr in groups:
                try:
                    job = jobs.pop()
                except IndexError:
                    break
                gr.append(job)
        return [g for g in groups if g]
    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
        '''
        Identify a book by its title/author/isbn/etc.
        :param log: A log object, use it to output debugging information/errors
        :param result_queue: A result Queue, results should be put into it.
                            Each result is a Metadata object
        :param abort: If abort.is_set() returns True, abort further processing
                      and return as soon as possible
        :param title: The title of the book, can be None
        :param authors: A list of authors of the book, can be None
        :param identifiers: A dictionary of other identifiers, most commonly
                            {'isbn':'1234...'}
        :return: None if no errors occurred, otherwise a unicode representation
                 of the error suitable for showing to the user
        '''
        return None
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -0,0 +1,215 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import time
 from urllib import urlencode
 from functools import partial
 from threading import Thread
 from lxml import etree
 from calibre.ebooks.metadata.sources import Source
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import parse_date, utcnow
 from calibre import browser, as_unicode
 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
              'atom' : 'http://www.w3.org/2005/Atom',
              'dc': 'http://purl.org/dc/terms'
            }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)
 total_results  = XPath('//openSearch:totalResults')
 start_index    = XPath('//openSearch:startIndex')
 items_per_page = XPath('//openSearch:itemsPerPage')
 entry          = XPath('//atom:entry')
 entry_id       = XPath('descendant::atom:id')
 creator        = XPath('descendant::dc:creator')
 identifier     = XPath('descendant::dc:identifier')
 title          = XPath('descendant::dc:title')
 date           = XPath('descendant::dc:date')
 publisher      = XPath('descendant::dc:publisher')
 subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')
 def to_metadata(browser, log, entry_):
    def get_text(extra, x):
        try:
            ans = x(extra)
            if ans:
                ans = ans[0].text
                if ans and ans.strip():
                    return ans.strip()
        except:
            log.exception('Programming error:')
        return None
    id_url = entry_id(entry_)[0].text
    title_ = ': '.join([x.text for x in title(entry_)]).strip()
    authors = [x.text.strip() for x in creator(entry_) if x.text]
    if not authors:
        authors = [_('Unknown')]
    if not id_url or not title:
        # Silently discard this entry
        return None
    mi = Metadata(title_, authors)
    try:
        raw = browser.open(id_url).read()
        feed = etree.fromstring(raw)
        extra = entry(feed)[0]
    except:
        log.exception('Failed to get additional details for', mi.title)
        return mi
    mi.comments = get_text(extra, description)
    #mi.language = get_text(extra, language)
    mi.publisher = get_text(extra, publisher)
    # Author sort
    for x in creator(extra):
        for key, val in x.attrib.items():
            if key.endswith('file-as') and val and val.strip():
                mi.author_sort = val
                break
    # ISBN
    isbns = []
    for x in identifier(extra):
        t = str(x.text).strip()
        if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
            if t[:5].upper() == 'ISBN:':
                isbns.append(t[5:])
    if isbns:
        mi.isbn = sorted(isbns, key=len)[-1]
    # Tags
    try:
        btags = [x.text for x in subject(extra) if x.text]
        tags = []
        for t in btags:
            tags.extend([y.strip() for y in t.split('/')])
        tags = list(sorted(list(set(tags))))
    except:
        log.exception('Failed to parse tags:')
        tags = []
    if tags:
        mi.tags = [x.replace(',', ';') for x in tags]
    # pubdate
    pubdate = get_text(extra, date)
    if pubdate:
        try:
            default = utcnow().replace(day=15)
            mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
        except:
            log.exception('Failed to parse pubdate')
    return mi
 class Worker(Thread):
    def __init__(self, log, entries, abort, result_queue):
        self.browser, self.log, self.entries = browser(), log, entries
        self.abort, self.result_queue = abort, result_queue
        Thread.__init__(self)
        self.daemon = True
    def run(self):
        for i in self.entries:
            try:
                ans = to_metadata(self.browser, self.log, i)
                if ans is not None:
                    self.result_queue.put(ans)
            except:
                self.log.exception(
                    'Failed to get metadata for identify entry:',
                    etree.tostring(i))
            if self.abort.is_set():
                break
 class GoogleBooks(Source):
    name = 'Google Books'
    def create_query(self, log, title=None, authors=None, identifiers={},
            start_index=1):
        BASE_URL = 'http://books.google.com/books/feeds/volumes?'
        isbn = identifiers.get('isbn', None)
        q = ''
        if isbn is not None:
            q += 'isbn:'+isbn
        elif title or authors:
            def build_term(prefix, parts):
                return ' '.join('in'+prefix + ':' + x for x in parts)
            if title is not None:
                q += build_term('title', title.split())
            if authors:
                q += ('+' if q else '')+build_term('author',
                        self.get_author_tokens(authors))
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        if not q:
            return None
        return BASE_URL+urlencode({
            'q':q,
            'max-results':20,
            'start-index':start_index,
            'min-viewability':'none',
            })
    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}):
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
        try:
            raw = browser().open_novisit(query).read()
        except Exception, e:
            log.exception('Failed to make identify query: %r'%query)
            return as_unicode(e)
        try:
            parser = etree.XMLParser(recover=True, no_network=True)
            feed = etree.fromstring(raw, parser=parser)
            entries = entry(feed)
        except Exception, e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)
        groups = self.split_jobs(entries, 5) # At most 5 threads
        if not groups:
            return
        workers = [Worker(log, entries, abort, result_queue) for entries in
                groups]
        if abort.is_set():
            return
        for worker in workers: worker.start()
        has_alive_worker = True
        while has_alive_worker and not abort.is_set():
            has_alive_worker = False
            for worker in workers:
                if worker.is_alive():
                    has_alive_worker = True
            time.sleep(0.1)
        return None
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -83,6 +83,7 @@ class RTFInput(InputFormatPlugin):
                os.mkdir(debug_dir)
                debug_dir = 'rtfdebug'
                run_lev = 4
                self.log('Running RTFParser in debug mode')
            except:
                pass
        parser = ParseRtf(
@ -230,22 +231,6 @@ class RTFInput(InputFormatPlugin):
        with open('styles.css', 'ab') as f:
            f.write(css)
    # def preprocess(self, fname):
        # self.log('\tPreprocessing to convert unicode characters')
        # try:
            # data = open(fname, 'rb').read()
            # from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
            # tokenizer = RtfTokenizer(data)
            # tokens = RtfTokenParser(tokenizer.tokens)
            # data = tokens.toRTF()
            # fname = 'preprocessed.rtf'
            # with open(fname, 'wb') as f:
                # f.write(data)
        # except:
            # self.log.exception(
            # 'Failed to preprocess RTF to convert unicode sequences, ignoring...')
        # return fname
    def convert_borders(self, doc):
        border_styles = []
        style_map = {}
@ -280,8 +265,6 @@ class RTFInput(InputFormatPlugin):
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
        #Name of the preprocesssed RTF file
        # fname = self.preprocess(stream.name)
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException, e:
@ -335,3 +318,4 @@ class RTFInput(InputFormatPlugin):
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@ -238,6 +238,8 @@ class ParseRtf:
                    bug_handler = RtfInvalidCodeException,
                        )
            enc = 'cp' + encode_obj.get_codepage()
            if enc == 'cp10000':
                enc = 'mac_roman'
            msg = 'Exception in token processing'
            if check_encoding_obj.check_encoding(self.__file, enc):
                file_name = self.__file if isinstance(self.__file, str) \
--- a/src/calibre/ebooks/rtf2xml/colors.py
+++ b/src/calibre/ebooks/rtf2xml/colors.py
@ -15,8 +15,10 @@
 #                                                                       #
 #                                                                       #
 #########################################################################
-import sys, os, tempfile,  re
+import sys, os, tempfile, re
 from calibre.ebooks.rtf2xml import copy
 class Colors:
    """
    Change lines with color info from color numbers to the actual color names.
@ -40,8 +42,10 @@ class Colors:
        self.__file = in_file
        self.__copy = copy
        self.__bug_handler = bug_handler
        self.__line = 0
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
    def __initiate_values(self):
        """
        Initiate all values.
@ -61,6 +65,7 @@ class Colors:
        self.__color_num = 1
        self.__line_color_exp = re.compile(r'bdr-color_:(\d+)')
        # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
    def __before_color_func(self, line):
        """
        Requires:
@ -76,6 +81,7 @@ class Colors:
        if self.__token_info == 'mi<mk<clrtbl-beg':
            self.__state = 'in_color_table'
        self.__write_obj.write(line)
    def __default_color_func(self, line):
        """
        Requires:
@ -87,6 +93,7 @@ class Colors:
            """
        hex_num = line[-3:-1]
        self.__color_string += hex_num
    def __blue_func(self, line):
        """
        Requires:
@ -109,6 +116,7 @@ class Colors:
        )
        self.__color_num += 1
        self.__color_string = '#'
    def __in_color_func(self, line):
        """
        Requires:
@ -127,12 +135,13 @@ class Colors:
            self.__state = 'after_color_table'
        else:
            action = self.__state_dict.get(self.__token_info)
-            if action == None:
+            if action is None:
                sys.stderr.write('in module colors.py\n'
                'function is self.__in_color_func\n'
                'no action for %s' % self.__token_info
                )
            action(line)
    def __after_color_func(self, line):
        """
        Check the to see if it contains color info. If it does, extract the
@ -180,6 +189,7 @@ class Colors:
        else:
            self.__write_obj.write(line)
        # cw<bd<bor-par-to<nu<bdr-hair__|bdr-li-wid:0.50|bdr-sp-wid:1.00|bdr-color_:2
    def __sub_from_line_color(self, match_obj):
        num = match_obj.group(1)
        try:
@ -191,25 +201,27 @@ class Colors:
            else:
                return 'bdr-color_:no-value'
        hex_num = self.__figure_num(num)
-        return_value = 'bdr-color_:%s' % hex_num
+        return 'bdr-color_:%s' % hex_num
-        return return_value
+
    def __figure_num(self, num):
        if num == 0:
            hex_num = 'false'
        else:
            hex_num = self.__color_dict.get(num)
-        if hex_num == None:
+        if hex_num is None:
            if self.__run_level > 3:
                msg = 'no value in self.__color_dict for key %s\n' % num
                raise self.__bug_hanlder, msg
        if hex_num == None:
            hex_num = '0'
            if self.__run_level > 5:
                msg = 'no value in self.__color_dict' \
                'for key %s at line %d\n' % (num, self.__line)
                raise self.__bug_handler, msg
        return hex_num
    def __do_nothing_func(self, line):
        """
        Bad RTF will have text in the color table
        """
        pass
    def convert_colors(self):
        """
        Requires:
@ -226,20 +238,16 @@ class Colors:
            info, and substitute the number with the hex number.
        """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
-        while line_to_read:
+                    self.__line+=1
-            line_to_read = read_obj.readline()
+                    self.__token_info = line[:16]
-            line = line_to_read
+                    action = self.__state_dict.get(self.__state)
-            self.__token_info = line[:16]
+                    if action is None:
-            action = self.__state_dict.get(self.__state)
+                        sys.stderr.write('no matching state in module fonts.py\n')
-            if action == None:
+                        sys.stderr.write(self.__state + '\n')
-                sys.stderr.write('no no matching state in module fonts.py\n')
+                    action(line)
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "color.data")
--- a/src/calibre/ebooks/rtf2xml/convert_to_tags.py
+++ b/src/calibre/ebooks/rtf2xml/convert_to_tags.py
@ -33,13 +33,13 @@ class ConvertToTags:
        self.__copy = copy
        self.__dtd_path = dtd_path
        self.__no_dtd = no_dtd
-        if encoding != 'mac_roman':
+        self.__encoding = 'cp' + encoding
-            self.__encoding = 'cp' + encoding
+        if encoding == 'mac_roman':
        else:
            self.__encoding = 'mac_roman'
        self.__indent = indent
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
        self.__convert_utf = False
    def __initiate_values(self):
        """
@ -213,7 +213,8 @@ class ConvertToTags:
        if not check_encoding_obj.check_encoding(self.__file, verbose=False):
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
        elif not check_encoding_obj.check_encoding(self.__file, self.__encoding):
-            self.__write_obj.write('<?xml version="1.0" encoding="%s" ?>' % self.__encoding)
+            self.__write_obj.write('<?xml version="1.0" encoding="UTF-8" ?>')
            self.__convert_utf = True
        else:
            self.__write_obj.write('<?xml version="1.0" encoding="US-ASCII" ?>')
            sys.stderr.write('Bad RTF encoding, revert to US-ASCII chars and'
@ -253,15 +254,28 @@ class ConvertToTags:
            an empty tag function.
            """
        self.__initiate_values()
-        self.__write_obj = open(self.__write_to, 'w')
+        with open(self.__write_to, 'w') as self.__write_obj:
-        self.__write_dec()
+            self.__write_dec()
-        with open(self.__file, 'r') as read_obj:
+            with open(self.__file, 'r') as read_obj:
-            for line in read_obj:
+                for line in read_obj:
-                self.__token_info = line[:16]
+                    self.__token_info = line[:16]
-                action = self.__state_dict.get(self.__token_info)
+                    action = self.__state_dict.get(self.__token_info)
-                if action is not None:
+                    if action is not None:
-                    action(line)
+                        action(line)
        self.__write_obj.close()
        #convert all encodings to UTF8 to avoid unsupported encodings in lxml
        if self.__convert_utf:
            copy_obj = copy.Copy(bug_handler = self.__bug_handler)
            copy_obj.rename(self.__write_to, self.__file)
            with open(self.__file, 'r') as read_obj:
                with open(self.__write_to, 'w') as write_obj:
                    file = read_obj.read()
                    try:
                        file = file.decode(self.__encoding)
                        write_obj.write(file.encode('utf-8'))
                    except:
                        sys.stderr.write('Conversion to UTF-8 is not possible,'
                        ' encoding should be very carefully checked')
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@ -75,12 +75,16 @@ class DefaultEncoding:
            self._encoding()
            self.__datafetched = True
            code_page = 'ansicpg' + self.__code_page
            if self.__code_page == '10000':
                self.__code_page = 'mac_roman'
        return self.__platform, code_page, self.__default_num
    def get_codepage(self):
        if not self.__datafetched:
            self._encoding()
            self.__datafetched = True
            if self.__code_page == '10000':
                self.__code_page = 'mac_roman'
        return self.__code_page
    def get_platform(self):
--- a/src/calibre/ebooks/rtf2xml/fonts.py
+++ b/src/calibre/ebooks/rtf2xml/fonts.py
@ -16,7 +16,9 @@
 #                                                                       #
 #########################################################################
 import sys, os, tempfile
 from calibre.ebooks.rtf2xml import copy
 class Fonts:
    """
    Change lines with font info from font numbers to the actual font names.
@ -45,6 +47,7 @@ class Fonts:
        self.__default_font_num = default_font_num
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
    def __initiate_values(self):
        """
        Initiate all values.
@ -67,6 +70,7 @@ class Fonts:
        self.__font_table = {}
        # individual font written
        self.__wrote_ind_font = 0
    def __default_func(self, line):
        """
        Requires:
@ -79,6 +83,7 @@ class Fonts:
        if self.__token_info == 'mi<mk<fonttb-beg':
            self.__state = 'font_table'
        self.__write_obj.write(line)
    def __font_table_func(self, line):
        """
        Requires:
@ -101,6 +106,7 @@ class Fonts:
            self.__font_num = self.__default_font_num
            self.__text_line = ''
        ##self.__write_obj.write(line)
    def __font_in_table_func(self, line):
        """
        Requires:
@ -138,6 +144,7 @@ class Fonts:
        elif self.__token_info == 'mi<mk<fonttb-end':
            self.__found_end_font_table_func()
            self.__state = 'after_font_table'
    def __found_end_font_table_func(self):
        """
        Required:
@ -150,7 +157,8 @@ class Fonts:
        if not self.__wrote_ind_font:
            self.__write_obj.write(
            'mi<tg<empty-att_'
-            '<font-in-table<name>Times<num>0\n' )
+            '<font-in-table<name>Times<num>0\n')
    def __after_font_table_func(self, line):
        """
        Required:
@ -169,7 +177,7 @@ class Fonts:
        if self.__token_info == 'cw<ci<font-style':
            font_num = line[20:-1]
            font_name = self.__font_table.get(font_num)
-            if font_name == None:
+            if font_name is None:
                if self.__run_level > 3:
                    msg = 'no value for %s in self.__font_table\n' % font_num
                    raise self.__bug_handler, msg
@ -182,6 +190,7 @@ class Fonts:
                )
        else:
            self.__write_obj.write(line)
    def convert_fonts(self):
        """
        Required:
@ -197,20 +206,15 @@ class Fonts:
            info. Substitute a font name for a font number.
            """
        self.__initiate_values()
-        read_obj = open(self.__file, 'r')
+        with open(self.__file, 'r') as read_obj:
-        self.__write_obj = open(self.__write_to, 'w')
+            with open(self.__write_to, 'w') as self.__write_obj:
-        line_to_read = 1
+                for line in read_obj:
-        while line_to_read:
+                    self.__token_info = line[:16]
-            line_to_read = read_obj.readline()
+                    action = self.__state_dict.get(self.__state)
-            line = line_to_read
+                    if action is None:
-            self.__token_info = line[:16]
+                        sys.stderr.write('no matching state in module fonts.py\n' \
-            action = self.__state_dict.get(self.__state)
+                                            + self.__state + '\n')
-            if action == None:
+                    action(line)
                sys.stderr.write('no no matching state in module fonts.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        default_font_name = self.__font_table.get(self.__default_font_num)
        if not default_font_name:
            default_font_name = 'Not Defined'
--- a/src/calibre/ebooks/rtf2xml/get_char_map.py
+++ b/src/calibre/ebooks/rtf2xml/get_char_map.py
@ -43,7 +43,7 @@ class GetCharMap:
    def get_char_map(self, map):
        if map == 'ansicpg0':
            map = 'ansicpg1250'
-        if map in ('ansicpg10000', '10000'):
+        if map == 'ansicpg10000':
            map = 'mac_roman'
        found_map = False
        map_dict = {}
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -126,12 +126,6 @@ class Tokenize:
        tokens = re.split(self.__splitexp, input_file)
        #remove empty tokens and \n
        return filter(lambda x: len(x) > 0 and x != '\n', tokens)
        #input_file = re.sub(self.__utf_exp, self.__from_ms_to_utf8, input_file)
        # line = re.sub( self.__neg_utf_exp, self.__neg_unicode_func, line)
        # this is for older RTF
        #line = re.sub(self.__par_exp, '\\par ', line)
        #return filter(lambda x: len(x) > 0, \
            #(self.__remove_line.sub('', x) for x in tokens)) 
    def __compile_expressions(self):
        SIMPLE_RPL = {
@ -160,7 +154,7 @@ class Tokenize:
            }
        self.__replace_spchar = MReplace(SIMPLE_RPL)
        #add ;? in case of char following \u
-        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})") #r"\\\'(..)"
+        self.__ms_hex_exp = re.compile(r"\\\'([0-9a-fA-F]{2})")
        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) ?")
        self.__bin_exp = re.compile(r"(?:\\bin(-?\d{0,10})[\n ]+)[01\n]+")
        #manage upr/ud situations
@ -172,14 +166,21 @@ class Tokenize:
        self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
        #this is for old RTF
        self.__par_exp = re.compile(r'\\\n+')
-        # self.__par_exp = re.compile(r'\\$')
+        #handle cw using a digit as argument and without space as delimiter
        self.__cwdigit_exp = re.compile(r"(\\[a-zA-Z]+[\-0-9]+)([^0-9 \\]+)")
        #self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
        #self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
        #self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
        #self.__remove_line = re.compile(r'\n+')
        #self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
        ##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
    def __correct_spliting(self, token):
        match_obj = re.search(self.__cwdigit_exp, token)
        if match_obj is None:
            return token
        else:
            return '%s\n%s' % (match_obj.group(1), match_obj.group(2))
    def tokenize(self):
        """Main class for handling other methods. Reads the file \
        , uses method self.sub_reg to make basic substitutions,\
@ -195,6 +196,8 @@ class Tokenize:
        tokens = map(self.__unicode_process, tokens)
        #remove empty items created by removing \uc
        tokens = filter(lambda x: len(x) > 0, tokens)
        #handles bothersome cases
        tokens = map(self.__correct_spliting, tokens)
        #write
        with open(self.__write_to, 'wb') as write_obj:
@ -203,8 +206,6 @@ class Tokenize:
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "tokenize.data")
        # if self.__out_file:
            # self.__file = self.__out_file
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -429,10 +429,12 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                old_extensions.add(ext)
        for ext in new_extensions:
            self.db.add_format(self.row, ext, open(paths[ext], 'rb'), notify=False)
-        db_extensions = set([f.lower() for f in self.db.formats(self.row).split(',')])
+        dbfmts = self.db.formats(self.row)
        db_extensions = set([f.lower() for f in (dbfmts.split(',') if dbfmts
            else [])])
        extensions = new_extensions.union(old_extensions)
        for ext in db_extensions:
-            if ext not in extensions:
+            if ext not in extensions and ext in self.original_formats:
                self.db.remove_format(self.row, ext, notify=False)
    def show_format(self, item, *args):
@ -576,6 +578,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        self.orig_date = qt_to_dt(self.date.date())
        exts = self.db.formats(row)
        self.original_formats = []
        if exts:
            exts = exts.split(',')
            for ext in exts:
@ -586,6 +589,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                if size is None:
                    continue
                Format(self.formats, ext, size, timestamp=timestamp)
                self.original_formats.append(ext.lower())
        self.initialize_combos()
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@ -472,6 +472,7 @@ class FormatsManager(QWidget): # {{{
    def initialize(self, db, id_):
        self.changed = False
        exts = db.formats(id_, index_is_id=True)
        self.original_val = set([])
        if exts:
            exts = exts.split(',')
            for ext in exts:
@ -482,6 +483,7 @@ class FormatsManager(QWidget): # {{{
                if size is None:
                    continue
                Format(self.formats, ext, size, timestamp=timestamp)
                self.original_val.add(ext.lower())
    def commit(self, db, id_):
        if not self.changed:
@ -500,11 +502,12 @@ class FormatsManager(QWidget): # {{{
        for ext in new_extensions:
            db.add_format(id_, ext, open(paths[ext], 'rb'), notify=False,
                    index_is_id=True)
-        db_extensions = set([f.lower() for f in db.formats(id_,
+        dbfmts = db.formats(id_, index_is_id=True)
-            index_is_id=True).split(',')])
+        db_extensions = set([f.lower() for f in (dbfmts.split(',') if dbfmts
            else [])])
        extensions = new_extensions.union(old_extensions)
        for ext in db_extensions:
-            if ext not in extensions:
+            if ext not in extensions and ext in self.original_val:
                db.remove_format(id_, ext, notify=False, index_is_id=True)
        self.changed = False
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -232,6 +232,7 @@ class BIBTEX(CatalogPlugin): # {{{
                help = _('The fields to output when cataloging books in the '
                    'database.  Should be a comma-separated list of fields.\n'
                    'Available fields: %s.\n'
                    'plus user-created custom fields.\n'
                    'Example: %s=title,authors,tags\n'
                    "Default: '%%default'\n"
                    "Applies to: BIBTEX output format")%(', '.join(FIELDS),
@ -269,7 +270,7 @@ class BIBTEX(CatalogPlugin): # {{{
                dest = 'bib_cit',
                action = None,
                help = _('The template for citation creation from database fields.\n'
-                    ' Should be a template with {} enclosed fields.\n'
+                    'Should be a template with {} enclosed fields.\n'
                    'Available fields: %s.\n'
                    "Default: '%%default'\n"
                    "Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
@ -344,7 +345,7 @@ class BIBTEX(CatalogPlugin): # {{{
                if field == 'authors' :
                    bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item))
-                elif field in ['title', 'publisher', 'cover', 'uuid',
+                elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
                        'author_sort', 'series'] :
                    bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
@ -378,7 +379,7 @@ class BIBTEX(CatalogPlugin): # {{{
                    if calibre_files:
                        files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\
                            for format in item]
-                        bibtex_entry.append(u'files = "%s"' % u', '.join(files))
+                        bibtex_entry.append(u'file = "%s"' % u', '.join(files))
                elif field == 'series_index' :
                    bibtex_entry.append(u'volume = "%s"' % int(item))
@ -474,6 +475,8 @@ class BIBTEX(CatalogPlugin): # {{{
        if opts.verbose:
            opts_dict = vars(opts)
            log("%s(): Generating %s" % (self.name,self.fmt))
            if opts.connected_device['is_device_connected']:
                log(" connected_device: %s" % opts.connected_device['name'])
            if opts_dict['search_text']:
                log(" --search='%s'" % opts_dict['search_text'])
@ -548,6 +551,7 @@ class BIBTEX(CatalogPlugin): # {{{
            as outfile:
            #File header
            nb_entries = len(data)
            #check in book strict if all is ok else throw a warning into log
            if bib_entry == 'book' :
                nb_books = len(filter(check_entry_book_valid, data))
@ -555,6 +559,11 @@ class BIBTEX(CatalogPlugin): # {{{
                    log(" WARNING: only %d entries in %d are book compatible" % (nb_books, nb_entries))
                    nb_entries = nb_books
            # If connected device, add 'On Device' values to data
            if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
                for entry in data:
                    entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
            outfile.write(u'%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
            outfile.write(u'@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
                % (nb_entries, nowf().strftime("%A, %d. %B %Y %H:%M").decode(preferred_encoding)))
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -391,6 +391,8 @@ Take your pick:
  * A tribute to the SONY Librie which was the first e-ink based e-book reader
  * My wife chose it ;-)
 |app| is pronounced as cal-i-ber *not* ca-libre. If you're wondering, |app| is the British/commonwealth spelling for caliber. Being Indian, that's the natural spelling for me. 
 Why does |app| show only some of my fonts on OS X?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 |app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.