IGN:...

2025-12-12 16:15:03 -05:00 · 2008-09-23 18:18:32 -07:00 · 2008-09-23 18:18:32 -07:00 · f3eebb473f
commit f3eebb473f
parent 63e59dd3be
5 changed files with 144 additions and 11 deletions
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -1,8 +1,37 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Conversion of HTML/OPF files follows several stages:
    * All links in the HTML files or in the OPF manifest are
    followed to build up a list of HTML files to be converted.
    This stage is implemented by 
    :function:`calibre.ebooks.html.traverse` and
    :class:`calibre.ebooks.html.HTMLFile`.
    * The HTML is pre-processed to make it more semantic. 
    All links in the HTML files to other resources like images,
    stylesheets, etc. are relativized. The resources are copied 
    into the `resources` sub directory. This is accomplished by
    :class:`calibre.ebooks.html.PreProcessor` and 
    :class:`calibre.ebooks.html.Parser`.
    * The HTML is processed. Various operations are performed.
    All style declarations are extracted and consolidated into 
    a single style sheet. Chapters are auto-detected and marked.
    Various font related manipulations are performed. See
    :class:`HTMLProcessor`.
    * The processed HTML is saved and the 
    :module:`calibre.ebooks.epub.split` module is used to split up
    large HTML files into smaller chunks.
    * The EPUB container is created.
 '''
 import os, sys, re, cStringIO
 from lxml.etree import XPath
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -463,6 +463,9 @@ class Processor(Parser):
        return Parser.save(self)
    def populate_toc(self, toc):
        '''
        Populate the Table of Contents from detected chapters and links.
        '''
        def add_item(href, fragment, text, target, type='link'):
            for entry in toc.flat():
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -17,11 +17,41 @@ E-book Format Conversion
 What formats does |app| support conversion to/from?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|app| supports the conversion of the following formats to LRF: HTML, LIT, MOBI, PRC, EPUB, CBR, CBZ, RTF, TXT, PDF and LRS. It also supports the conversion of LRF to LRS and HTML(forthcoming). Note that calibre does not support the conversion of DRMed ebooks.
+|app| supports the conversion of the following formats:
-What are the best formats to convert to LRF?
+----------------------------+------------------------------------------+
 |                            |          **Output formats**              |
 |                            +------------------+-----------------------+
 |                            |      EPUB        |         LRF           |
 +===================+========+==================+=======================+      
 |                   |  MOBI  |       ✔          |          ✔            |
 |                   |        |                  |                       |
 |                   |  LIT   |       ✔          |          ✔            |
 |                   |        |                  |                       |
 |                   |  PRC   |       ✔          |          ✔            |
 |                   |        |                  |                       |
 |                   |  EPUB  |       ✔          |          ✔            |
 |                   |        |                  |                       |
 |                   |  HTML  |       ✔          |          ✔            |
 |                   |        |                  |                       |
 | **Input formats** |  CBR   |       ✔          |          ✔            |
 |                   |        |                  |                       |
 |                   |  CBZ   |       ✔          |          ✔            |
 |                   |        |                  |                       |
 |                   |  RTF   |       ✔          |          ✔            |
 |                   |        |                  |                       |
 |                   |  TXT   |       ✔          |          ✔            |
 |                   |        |                  |                       |
 |                   |  PDF   |       ✔          |          ✔            | 
 |                   |        |                  |                       |
 |                   |  LRS   |                  |          ✔            |
 +-------------------+--------+------------------+-----------------------+
 What are the best source formats to convert?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In order of decreasing preference: LIT, MOBI, HTML, PRC, RTF, TXT, PDF 
+In order of decreasing preference: LIT, MOBI, EPUB, HTML, PRC, RTF, TXT, PDF 
 Why does the PDF conversion lose some images/tables?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/web/feeds/recipes/discover_magazine.py
+++ b/src/calibre/web/feeds/recipes/discover_magazine.py
@ -0,0 +1,33 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 doscovermagazine.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DiscoverMagazine(BasicNewsRecipe):
    title = u'Discover Magazine'
    description = u'Science, Technology and the Future' 
    __author__ = 'Mike Diaz' 
    oldest_article = 33 
    max_articles_per_feed = 20 
    feeds = [
             (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'), 
             (u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'), 
             (u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'), 
             (u'Space', u'http://discovermagazine.com/topics/space/rss.xml'), 
             (u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'), 
             (u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'), 
             (u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'), 
             (u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'), 
             (u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'), 
             (u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'), 
             (u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'), 
             (u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'), 
             (u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'), 
             (u'Science Not Fiction', u'http://blogs.discovermagazine.com/sciencenotfiction/wp-rss.php')
            ]
--- a/src/calibre/web/feeds/recipes/scientific_american.py
+++ b/src/calibre/web/feeds/recipes/scientific_american.py
@ -0,0 +1,38 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 sciam.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class ScientificAmerican(BasicNewsRecipe):
    title = u'Scientific American'
    description = u'Popular science' 
    __author__ = 'Kovid Goyal'
    oldest_article = 30 
    max_articles_per_feed = 100
    use_embedded_content   = False
    remove_tags_before = dict(name='div', attrs={'class':'headline'})
    remove_tags_after  = dict(id='article')
    remove_tags        = [dict(id='sharetools'), dict(id='reddit')]
    html2lrf_options = ['--base-font-size', '8']
    feeds = [
             (u'Latest News', u'http://rss.sciam.com/ScientificAmerican-News'), 
             (u'Global', u'http://rss.sciam.com/ScientificAmerican-Global'), 
             (u'Health', u'http://rss.sciam.com/sciam/health'), 
             (u'Space', u'http://rss.sciam.com/sciam/space'), 
             (u'Technology', u'http://rss.sciam.com/sciam/technology'), 
             (u'Biology', u'http://rss.sciam.com/sciam/biology'), 
             (u'Mind & Brain', u'http://rss.sciam.com/sciam/mind-and-brain'), 
             (u"What's Next", u'http://rss.sciam.com/sciam/whats-next'), 
             (u'Archeology and Paleontology', u'http://www.sciam.com/page.cfm?section=rsscategory&alias=archaeology-and-paleontology'), 
             (u'Physics', u'http://www.sciam.com/page.cfm?section=rsscategory&alias=physics'), 
             (u'Math', u'http://rss.sciam.com/sciam/math'), 
             (u'History of Science', u'http://www.sciam.com/page.cfm?section=rsscategory&alias=history-of-science'), 
             (u'Chemistry', u'http://rss.sciam.com/sciam/chemistry'), 
             (u'Mind Matters', u'http://rss.sciam.com/ScientificAmerican-MindBlog')
            ]