KG updates

2025-07-09 03:04:10 -04:00 · 2011-06-24 06:27:06 -06:00 · 2011-06-24 06:27:06 -06:00 · 64ae406db9
commit 64ae406db9
parent b0766bd2cb aae6dfdab5
10 changed files with 562 additions and 379 deletions
--- a/recipes/le_temps.recipe
+++ b/recipes/le_temps.recipe
@ -14,7 +14,7 @@ class LeTemps(BasicNewsRecipe):
     title          = u'Le Temps'
     oldest_article = 7
     max_articles_per_feed = 100
-     __author__ = 'Sujata Raman'
+     __author__ = 'Kovid Goyal'
     description = 'French news. Needs a subscription from http://www.letemps.ch'
     no_stylesheets = True
     remove_javascript = True
@ -27,6 +27,7 @@ class LeTemps(BasicNewsRecipe):
     def get_browser(self):
         br = BasicNewsRecipe.get_browser(self)
         br.open('http://www.letemps.ch/login')
+         br.select_form(nr=1)
         br['username'] = self.username
         br['password'] = self.password
         raw = br.submit().read()
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -875,7 +875,7 @@ class ActionCopyToLibrary(InterfaceActionBase):
 class ActionTweakEpub(InterfaceActionBase):
    name = 'Tweak ePub'
    actual_plugin = 'calibre.gui2.actions.tweak_epub:TweakEpubAction'
-    description = _('Make small twekas to epub files in your calibre library')
+    description = _('Make small tweaks to epub files in your calibre library')

 class ActionNextMatch(InterfaceActionBase):
    name = 'Next Match'
--- a/src/calibre/ebooks/metadata/fb2.py
+++ b/src/calibre/ebooks/metadata/fb2.py
@ -1,96 +1,235 @@
 #!/usr/bin/env python
 from __future__ import with_statement
 __license__   = 'GPL v3'
-__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
-
+__copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>, '\
+                '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
 '''Read meta information from fb2 files'''

 import os
+import datetime
+from functools import partial
 from base64 import b64decode
 from lxml import etree
-from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.date import parse_date
+from calibre import guess_all_extensions, prints, force_unicode
+from calibre.ebooks.metadata import MetaInformation, check_isbn
 from calibre.ebooks.chardet import xml_to_unicode
-from calibre import guess_all_extensions

-XLINK_NS     = 'http://www.w3.org/1999/xlink'
-def XLINK(name):
-    return '{%s}%s' % (XLINK_NS, name)

+NAMESPACES = {
+    'fb2'   :   'http://www.gribuser.ru/xml/fictionbook/2.0',
+    'xlink' :   'http://www.w3.org/1999/xlink'  }
+
+XPath = partial(etree.XPath, namespaces=NAMESPACES)
+tostring = partial(etree.tostring, method='text', encoding=unicode)

 def get_metadata(stream):
-    """ Return metadata as a L{MetaInfo} object """
-    XPath = lambda x : etree.XPath(x,
-            namespaces={'fb2':'http://www.gribuser.ru/xml/fictionbook/2.0',
-                'xlink':XLINK_NS})
-    tostring = lambda x : etree.tostring(x, method='text',
-            encoding=unicode).strip()
+    """ Return fb2 metadata as a L{MetaInformation} object """
+
+    root = _get_fbroot(stream)
+
+    book_title = _parse_book_title(root)
+    authors = _parse_authors(root)
+
+    # fallback for book_title
+    if book_title:
+        book_title = unicode(book_title)
+    else:
+        book_title = force_unicode(os.path.splitext(
+            os.path.basename(getattr(stream, 'name',
+                _('Unknown'))))[0])
+    mi = MetaInformation(book_title, authors)
+
+    try:
+        _parse_cover(root, mi)
+    except:
+        pass
+    try:
+        _parse_comments(root, mi)
+    except:
+        pass
+    try:
+        _parse_tags(root, mi)
+    except:
+        pass
+    try:
+        _parse_series(root, mi)
+    except:
+        pass
+    try:
+        _parse_isbn(root, mi)
+    except:
+        pass
+    try:
+        _parse_publisher(root, mi)
+    except:
+        pass
+    try:
+        _parse_pubdate(root, mi)
+    except:
+        pass
+    try:
+        _parse_timestamp(root, mi)
+    except:
+        pass
+
+    try:
+        _parse_language(root, mi)
+    except:
+        pass
+    #_parse_uuid(root, mi)
+
+    #if DEBUG:
+    #   prints(mi)
+    return mi
+
+def _parse_authors(root):
+    authors = []
+    # pick up authors but only from 1 secrion <title-info>; otherwise it is not consistent!
+    # Those are fallbacks: <src-title-info>, <document-info>
+    for author_sec in ['title-info', 'src-title-info', 'document-info']:
+        for au in XPath('//fb2:%s/fb2:author'%author_sec)(root):
+            author = _parse_author(au)
+            if author:
+                authors.append(author)
+        if author:
+            break
+
+    # if no author so far
+    if not authors:
+        authors.append(_('Unknown'))
+
+    return authors
+
+def _parse_author(elm_author):
+    """ Returns a list of display author and sortable author"""
+
+    xp_templ = 'normalize-space(fb2:%s/text())'
+
+    author = XPath(xp_templ % 'first-name')(elm_author)
+    lname = XPath(xp_templ % 'last-name')(elm_author)
+    mname = XPath(xp_templ % 'middle-name')(elm_author)
+
+    if mname:
+        author = (author + ' ' + mname).strip()
+    if lname:
+        author = (author + ' ' + lname).strip()
+
+    # fallback to nickname
+    if not author:
+        nname = XPath(xp_templ % 'nickname')(elm_author)
+        if nname:
+            author = nname
+
+    return author
+
+
+def _parse_book_title(root):
+    # <title-info> has a priority.   (actually <title-info>  is mandatory)
+    # other are backup solution (sequence is important. other then in fb2-doc)
+    xp_ti = '//fb2:title-info/fb2:book-title/text()'
+    xp_pi = '//fb2:publish-info/fb2:book-title/text()'
+    xp_si = '//fb2:src-title-info/fb2:book-title/text()'
+    book_title = XPath('normalize-space(%s|%s|%s)' % (xp_ti, xp_pi, xp_si))(root)
+
+    return book_title
+
+def _parse_cover(root, mi):
+    # pickup from <title-info>, if not exists it fallbacks to <src-title-info>
+    imgid = XPath('substring-after(string(//fb2:coverpage/fb2:image/@xlink:href), "#")')(root)
+    if imgid:
+        try:
+            _parse_cover_data(root, imgid, mi)
+        except:
+            pass
+
+def _parse_cover_data(root, imgid, mi):
+    elm_binary = XPath('//fb2:binary[@id="%s"]'%imgid)(root)
+    if elm_binary:
+        mimetype = elm_binary[0].get('content-type', 'image/jpeg')
+        mime_extensions = guess_all_extensions(mimetype)
+        if mime_extensions:
+            pic_data = elm_binary[0].text
+            if pic_data:
+                mi.cover_data = (mime_extensions[0][1:], b64decode(pic_data))
+        else:
+            prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" % (mimetype, imgid) )
+
+def _parse_tags(root, mi):
+    # pick up genre but only from 1 secrion <title-info>; otherwise it is not consistent!
+    # Those are fallbacks: <src-title-info>
+    for genre_sec in ['title-info', 'src-title-info']:
+        # -- i18n Translations-- ?
+        tags = XPath('//fb2:%s/fb2:genre/text()' % genre_sec)(root)
+        if tags:
+            mi.tags = list(map(unicode, tags))
+            break
+
+def _parse_series(root, mi):
+    #calibri supports only 1 series: use the 1-st one
+    # pick up sequence but only from 1 secrion in prefered order
+    # except <src-title-info>
+    xp_ti = '//fb2:title-info/fb2:sequence[1]'
+    xp_pi = '//fb2:publish-info/fb2:sequence[1]'
+
+    elms_sequence = XPath('%s|%s' % (xp_ti, xp_pi))(root)
+    if elms_sequence:
+        mi.series = elms_sequence[0].get('name', None)
+        if mi.series:
+            mi.series_index = elms_sequence[0].get('number', None)
+
+def _parse_isbn(root, mi):
+    # some people try to put several isbn in this field, but it is not allowed.  try to stick to the 1-st one in this case
+    isbn = XPath('normalize-space(//fb2:publish-info/fb2:isbn/text())')(root)
+    # some people try to put several isbn in this field, but it is not allowed.  try to stick to the 1-st one in this case
+    if ',' in isbn:
+        isbn = isbn[:isbn.index(',')]
+    if check_isbn(isbn):
+        mi.isbn = isbn
+
+def _parse_comments(root, mi):
+    # pick up annotation but only from 1 secrion <title-info>;  fallback: <src-title-info>
+    for annotation_sec in ['title-info', 'src-title-info']:
+        elms_annotation = XPath('//fb2:%s/fb2:annotation' % annotation_sec)(root)
+        if elms_annotation:
+            mi.comments = tostring(elms_annotation[0])
+            # TODO: tags i18n, xslt?
+            break
+
+def _parse_publisher(root, mi):
+    publisher = XPath('string(//fb2:publish-info/fb2:publisher/text())')(root)
+    if publisher:
+        mi.publisher = publisher
+
+def _parse_pubdate(root, mi):
+    year = XPath('number(//fb2:publish-info/fb2:year/text())')(root)
+    if float.is_integer(year):
+        # only year is available, so use 1-st of Jan
+        mi.pubdate = datetime.date(int(year), 1, 1)
+
+def _parse_timestamp(root, mi):
+    #<date value="1996-12-03">03.12.1996</date>
+    xp ='//fb2:document-info/fb2:date/@value|'\
+        '//fb2:document-info/fb2:date/text()'
+    docdate = XPath('string(%s)' % xp)(root)
+    if docdate:
+        mi.timestamp = parse_date(docdate)
+
+def _parse_language(root, mi):
+    language = XPath('string(//fb2:title-info/fb2:lang/text())')(root)
+    if language:
+        mi.language = language
+        mi.languages = [ language ]
+
+def _parse_uuid(root, mi):
+    uuid = XPath('normalize-space(//document-info/fb2:id/text())')(root)
+    if uuid:
+        mi.uuid = uuid
+
+def _get_fbroot(stream):
    parser = etree.XMLParser(recover=True, no_network=True)
    raw = stream.read()
-    raw = xml_to_unicode(raw, strip_encoding_pats=True,
-            assume_utf8=True)[0]
+    raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
    root = etree.fromstring(raw, parser=parser)
-    authors, author_sort = [], None
-    for au in XPath('//fb2:author')(root):
-        fname = lname = author = None
-        fe = XPath('descendant::fb2:first-name')(au)
-        if fe:
-            fname = tostring(fe[0])
-            author = fname
-        le = XPath('descendant::fb2:last-name')(au)
-        if le:
-            lname = tostring(le[0])
-            if author:
-                author += ' '+lname
-            else:
-                author = lname
-        if author:
-            authors.append(author)
-        if len(authors) == 1 and author is not None:
-            if lname:
-                author_sort = lname
-            if fname:
-                if author_sort: author_sort += ', '+fname
-                else: author_sort = fname
-    title = os.path.splitext(os.path.basename(getattr(stream, 'name',
-        _('Unknown'))))[0]
-    for x in XPath('//fb2:book-title')(root):
-        title = tostring(x)
-        break
-    comments = ''
-    for x in XPath('//fb2:annotation')(root):
-        comments += tostring(x)
-    if not comments:
-        comments = None
-    tags = list(map(tostring, XPath('//fb2:genre')(root)))
+    return root

-    cp = XPath('//fb2:coverpage')(root)
-    cdata = None
-    if cp:
-        cimage = XPath('descendant::fb2:image[@xlink:href]')(cp[0])
-        if cimage:
-            id = cimage[0].get(XLINK('href')).replace('#', '')
-            binary = XPath('//fb2:binary[@id="%s"]'%id)(root)
-            if binary:
-                mt = binary[0].get('content-type', 'image/jpeg')
-                exts = guess_all_extensions(mt)
-                if not exts:
-                    exts = ['.jpg']
-                cdata = (exts[0][1:], b64decode(tostring(binary[0])))
-
-    series = None
-    series_index = 1.0
-    for x in XPath('//fb2:sequence')(root):
-        series = x.get('name', None)
-        if series is not None:
-            series_index = x.get('number', 1.0)
-            break
-    mi = MetaInformation(title, authors)
-    mi.comments = comments
-    mi.author_sort = author_sort
-    if tags:
-        mi.tags = tags
-    mi.series = series
-    mi.series_index = series_index
-    if cdata:
-        mi.cover_data = cdata
-    return mi
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -591,8 +591,10 @@ class BooksView(QTableView): # {{{
        fmt = prefs['output_format']

        def url_for_id(i):
-            ans = db.format(i, fmt, index_is_id=True, as_path=True,
-                    preserve_filename=True)
+            try:
+                ans = db.format_path(i, fmt, index_is_id=True)
+            except:
+                ans = None
            if ans is None:
                fmts = db.formats(i, index_is_id=True)
                if fmts:
@ -600,13 +602,15 @@ class BooksView(QTableView): # {{{
                else:
                    fmts = []
                for f in fmts:
-                    ans = db.format(i, f, index_is_id=True, as_path=True,
-                            preserve_filename=True)
+                    try:
+                        ans = db.format_path(i, f, index_is_id=True)
+                    except:
+                        ans = None
            if ans is None:
                ans = db.abspath(i, index_is_id=True)
            return QUrl.fromLocalFile(ans)

-        md.setUrls([url_for_id(i) for i in selected[:25]])
+        md.setUrls([url_for_id(i) for i in selected])
        drag = QDrag(self)
        col = self.selectionModel().currentIndex().column()
        md.column_name = self.column_map[col]
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -149,7 +149,8 @@ class TagsView(QTreeView): # {{{
                                hidden_categories=self.hidden_categories,
                                search_restriction=None,
                                drag_drop_finished=self.drag_drop_finished,
-                                collapse_model=self.collapse_model)
+                                collapse_model=self.collapse_model,
+                                state_map={})
        self.pane_is_visible = True # because TagsModel.init did a recount
        self.sort_by = sort_by
        self.tag_match = tag_match
@ -173,6 +174,7 @@ class TagsView(QTreeView): # {{{
            self.made_connections = True
        self.refresh_signal_processed = True
        db.add_listener(self.database_changed)
+        self.expanded.connect(self.item_expanded)

    def database_changed(self, event, ids):
        if self.refresh_signal_processed:
@ -541,6 +543,10 @@ class TagsView(QTreeView): # {{{
        return self.isExpanded(idx)

    def recount(self, *args):
+        '''
+        Rebuild the category tree, expand any categories that were expanded,
+        reset the search states, and reselect the current node.
+        '''
        if self.disable_recounting or not self.pane_is_visible:
            return
        self.refresh_signal_processed = True
@ -548,18 +554,23 @@ class TagsView(QTreeView): # {{{
        if not ci.isValid():
            ci = self.indexAt(QPoint(10, 10))
        path = self.model().path_for_index(ci) if self.is_visible(ci) else None
-        try:
-            if not self.model().refresh(): # categories changed!
-                self.set_new_model()
-                path = None
-        except: #Database connection could be closed if an integrity check is happening
-            pass
+        expanded_categories, state_map = self.model().get_state()
+        self.set_new_model(state_map=state_map)
+        for category in expanded_categories:
+            self.expand(self.model().index_for_category(category))
        self._model.show_item_at_path(path)

-    # If the number of user categories changed,  if custom columns have come or
-    # gone, or if columns have been hidden or restored, we must rebuild the
-    # model. Reason: it is much easier than reconstructing the browser tree.
-    def set_new_model(self, filter_categories_by=None):
+    def item_expanded(self, idx):
+        '''
+        Called by the expanded signal
+        '''
+        self.setCurrentIndex(idx)
+
+    def set_new_model(self, filter_categories_by=None, state_map={}):
+        '''
+        There are cases where we need to rebuild the category tree without
+        attempting to reposition the current node.
+        '''
        try:
            old = getattr(self, '_model', None)
            if old is not None:
@ -569,7 +580,8 @@ class TagsView(QTreeView): # {{{
                                    search_restriction=self.search_restriction,
                                    drag_drop_finished=self.drag_drop_finished,
                                    filter_categories_by=filter_categories_by,
-                                    collapse_model=self.collapse_model)
+                                    collapse_model=self.collapse_model,
+                                    state_map=state_map)
            self.setModel(self._model)
        except:
            # The DB must be gone. Set the model to None and hope that someone
@ -752,7 +764,8 @@ class TagsModel(QAbstractItemModel): # {{{

    def __init__(self, db, parent, hidden_categories=None,
            search_restriction=None, drag_drop_finished=None,
-            filter_categories_by=None, collapse_model='disable'):
+            filter_categories_by=None, collapse_model='disable',
+            state_map={}):
        QAbstractItemModel.__init__(self, parent)

        # must do this here because 'QPixmap: Must construct a QApplication
@ -776,10 +789,10 @@ class TagsModel(QAbstractItemModel): # {{{
        self.filter_categories_by = filter_categories_by
        self.collapse_model = collapse_model

-        # get_node_tree cannot return None here, because row_map is empty. Note
-        # that get_node_tree can indirectly change the user_categories dict.
+        # Note that _get_category_nodes can indirectly change the
+        # user_categories dict.

-        data = self.get_node_tree(config['sort_tags_by'])
+        data = self._get_category_nodes(config['sort_tags_by'])
        gst = db.prefs.get('grouped_search_terms', {})
        self.root_item = TagTreeItem(icon_map=self.icon_state_map)
        self.category_nodes = []
@ -844,7 +857,7 @@ class TagsModel(QAbstractItemModel): # {{{
                category_node_map[key] = node
                last_category_node = node
                self.category_nodes.append(node)
-        self.refresh(data=data)
+        self._create_node_tree(data, state_map)

    def break_cycles(self):
        self.root_item.break_cycles()
@ -1121,8 +1134,10 @@ class TagsModel(QAbstractItemModel): # {{{
    def set_search_restriction(self, s):
        self.search_restriction = s

-    def get_node_tree(self, sort):
-        old_row_map_len = len(self.row_map)
+    def _get_category_nodes(self, sort):
+        '''
+        Called by __init__. Do not directly call this method.
+        '''
        self.row_map = []
        self.categories = {}

@ -1176,19 +1191,27 @@ class TagsModel(QAbstractItemModel): # {{{
            if category in data: # The search category can come and go
                self.row_map.append(category)
                self.categories[category] = tb_categories[category]['name']
-
-        if old_row_map_len != 0 and old_row_map_len != len(self.row_map):
-            # A category has been added or removed. We must force a rebuild of
-            # the model
-            return None
        return data

    def refresh(self, data=None):
+        '''
+        Here to trap usages of refresh in the old architecture. Can eventually
+        be removed.
+        '''
+        print 'TagsModel: refresh called!'
+        traceback.print_stack()
+        return False
+
+    def _create_node_tree(self, data, state_map):
+        '''
+        Called by __init__. Do not directly call this method.
+        '''
        sort_by = config['sort_tags_by']
+
        if data is None:
-            data = self.get_node_tree(sort_by) # get category data
-        if data is None:
-            return False
+            print '_create_node_tree: no data!'
+            traceback.print_stack()
+            return

        collapse = gprefs['tags_browser_collapse_at']
        collapse_model = self.collapse_model
@ -1354,29 +1377,23 @@ class TagsModel(QAbstractItemModel): # {{{
        # }}}

        for category in self.category_nodes:
-            if len(category.children) > 0:
-                child_map = category.children
-                states = [c.tag.state for c in category.child_tags()]
-                names = [(c.tag.name, c.tag.category) for c in category.child_tags()]
-                state_map = dict(izip(names, states))
-                # temporary sub-categories (the partitioning ones) must follow
-                # the permanent sub-categories. This will happen naturally if
-                # the temp ones are added by process_node
-                ctags = [c for c in child_map if
-                         c.type == TagTreeItem.CATEGORY and not c.temporary]
-                start = len(ctags)
-                self.beginRemoveRows(self.createIndex(category.row(), 0, category),
-                                     start, len(child_map)-1)
-                category.children = ctags
-                for i in range(start, len(child_map)):
-                    child_map[i].break_cycles()
-                child_map = None
-                self.endRemoveRows()
-            else:
-                state_map = {}
+            process_one_node(category, state_map.get(category.py_name, {}))

-            process_one_node(category, state_map)
-        return True
+    def get_state(self):
+        state_map = {}
+        expanded_categories = []
+        for row, category in enumerate(self.category_nodes):
+            if self.tags_view.isExpanded(self.index(row, 0, QModelIndex())):
+                expanded_categories.append(category.py_name)
+            states = [c.tag.state for c in category.child_tags()]
+            names = [(c.tag.name, c.tag.category) for c in category.child_tags()]
+            state_map[category.py_name] = dict(izip(names, states))
+        return expanded_categories, state_map
+
+    def index_for_category(self, name):
+        for row, category in enumerate(self.category_nodes):
+            if category.py_name == name:
+                return self.index(row, 0, QModelIndex())

    def columnCount(self, parent):
        return 1
@ -1476,7 +1493,7 @@ class TagsModel(QAbstractItemModel): # {{{
            self.tags_view.tag_item_renamed.emit()
            item.tag.name = val
            self.rename_item_in_all_user_categories(name, key, val)
-            self.refresh() # Should work, because no categories can have disappeared
+            self.refresh_required.emit()
        self.show_item_at_path(path)
        return True

@ -1789,19 +1806,22 @@ class TagsModel(QAbstractItemModel): # {{{
                        return v
        return None

-    def show_item_at_path(self, path, box=False):
+    def show_item_at_path(self, path, box=False,
+                          position=QTreeView.PositionAtCenter):
        '''
        Scroll the browser and open categories to show the item referenced by
        path. If possible, the item is placed in the center. If box=True, a
        box is drawn around the item.
        '''
        if path:
-            self.show_item_at_index(self.index_for_path(path), box)
+            self.show_item_at_index(self.index_for_path(path), box=box,
+                                    position=position)

-    def show_item_at_index(self, idx, box=False):
+    def show_item_at_index(self, idx, box=False,
+                           position=QTreeView.PositionAtCenter):
        if idx.isValid():
            self.tags_view.setCurrentIndex(idx)
-            self.tags_view.scrollTo(idx, QTreeView.PositionAtCenter)
+            self.tags_view.scrollTo(idx, position)
            if box:
                tag_item = idx.internalPointer()
                tag_item.boxed = True
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1144,6 +1144,20 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                    break
        return sha.hexdigest()

+    def format_path(self, index, fmt, index_is_id=False):
+        '''
+        This method is intended to be used only in those rare situations, like
+        Drag'n Drop, when you absolutely need the path to the original file.
+        Otherwise, use format(..., as_path=True).
+
+        Note that a networked backend will always return None.
+        '''
+        path = self.format_abspath(index, fmt, index_is_id=index_is_id)
+        if path is None:
+            id_ = index if index_is_id else self.id(index)
+            raise NoSuchFormat('Record %d has no format: %s'%(id_, fmt))
+        return path
+
    def format_abspath(self, index, format, index_is_id=False):
        '''
        Return absolute path to the ebook file of format `format`
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -633,6 +633,7 @@ TXT input supports a number of options to differentiate how paragraphs are detec
    :guilabel:`Formatting Style: None`
        Applies no special formatting to the text, the document is converted to html with no other changes.

+.. _pdfconversion:

 Convert PDF documents
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -35,29 +35,11 @@ What are the best source formats to convert?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 In order of decreasing preference: LIT, MOBI, EPUB, FB2, HTML, PRC, RTF, PDB, TXT, PDF

-Why does the PDF conversion lose some images/tables?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The PDF conversion tries to extract the text and images from the PDF file and convert them to and HTML based ebook. Some PDF files have images in a format that cannot be extracted (vector images). All tables
-are also represented as vector diagrams, thus they cannot be extracted.
+I converted a PDF file, but the result has various problems?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-How do I convert a collection of HTML files in a specific order?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like::
+PDF is a terrible format to convert from. For a list of the various issues you will encounter when converting PDF, see: :ref:`pdfconversion`.

-   <html>
-      <body>
-        <h1>Table of Contents</h1>
-        <p style="text-indent:0pt">
-           <a href="file1.html">First File</a><br/>
-           <a href="file2.html">Second File</a><br/>
-           .
-           .
-           .
-        </p>
-      </body>
-   </html>
-
-Then just add this HTML file to the GUI and use the convert button to create your ebook.

 .. _char-encoding-faq:

@ -85,6 +67,26 @@ If you have a hand edited TOC in the input document, you can use the TOC detecti

 Finally, I encourage you to ditch the content TOC and only have a metadata TOC in your ebooks. Metadata TOCs will give the people reading your ebooks a much superior navigation experience (except on the Kindle, where they are essentially the same as a content TOC).

+How do I convert a collection of HTML files in a specific order?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like::
+
+   <html>
+      <body>
+        <h1>Table of Contents</h1>
+        <p style="text-indent:0pt">
+           <a href="file1.html">First File</a><br/>
+           <a href="file2.html">Second File</a><br/>
+           .
+           .
+           .
+        </p>
+      </body>
+   </html>
+
+Then just add this HTML file to the GUI and use the convert button to create your ebook.
+
+
 How do I use some of the advanced features of the conversion tools?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 You can get help on any individual feature of the converters by mousing over it in the GUI or running ``ebook-convert dummy.html .epub -h`` at a terminal. A good place to start is to look at the following demo files that demonstrate some of the advanced features:
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/utils/mem.py
+++ b/src/calibre/utils/mem.py
@ -208,6 +208,8 @@ def gc_histogram():
 def diff_hists(h1, h2):
    """Prints differences between two results of gc_histogram()."""
    for k in h1:
+        if k not in h2:
+            h2[k] = 0
        if h1[k] != h2[k]:
            print "%s: %d -> %d (%s%d)" % (
                k, h1[k], h2[k], h2[k] > h1[k] and "+" or "", h2[k] - h1[k])