Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-07-02 21:38:10 +01:00 · 2010-07-02 21:38:10 +01:00 · 502e80c972
commit 502e80c972
parent 430ff00edb f3c723077a
32 changed files with 16610 additions and 12550 deletions
--- a/resources/recipes/foreign_policy.recipe
+++ b/resources/recipes/foreign_policy.recipe
@ -0,0 +1,45 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.foreignpolicy.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ForeignPolicy(BasicNewsRecipe):
+    title                 = 'Foreign Policy'
+    __author__            = 'Darko Miletic'
+    description           = 'International News'
+    publisher             = 'Washingtonpost.Newsweek Interactive, LLC'
+    category              = 'news, politics, USA'
+    oldest_article        = 31
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    extra_css             = ' body{font-family: Georgia,"Times New Roman",Times,serif } img{margin-bottom: 0.4em} h1,h2,h3,h4,h5,h6{font-family: Arial,Helvetica,sans-serif} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    keep_only_tags    = [dict(attrs={'id':['art-mast','art-body','auth-bio']})]
+    remove_tags       = [dict(name='iframe'),dict(attrs={'id':['share-box','base-ad']})]
+    remove_attributes = ['height','width']
+
+
+    feeds = [(u'Articles', u'http://www.foreignpolicy.com/node/feed')]
+
+    def print_version(self, url):
+        return url + '?print=yes&page=full'
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/resources/recipes/statesman.recipe
+++ b/resources/recipes/statesman.recipe
@ -0,0 +1,35 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1278049615(BasicNewsRecipe):
+    title          = u'Statesman'
+    pubisher  = 'http://www.statesman.com/'
+    description           = 'Austin Texas Daily Newspaper'
+    category              = 'News, Austin, Texas'
+    __author__            = 'rty'
+    oldest_article = 3
+
+    max_articles_per_feed = 100
+
+    feeds          = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
+	(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
+	(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
+	(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
+	(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
+	]
+    masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
+    #temp_files = []
+    #articles_are_obfuscated = True
+
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    language = 'en'
+    encoding               = 'utf-8'
+    conversion_options = {'linearize_tables':True}
+    remove_tags = [
+                    dict(name='div', attrs={'id':'cxArticleOptions'}),
+                        ]
+    keep_only_tags = [
+	 dict(name='div', attrs={'class':'cxArticleHeader'}),
+                     dict(name='div', attrs={'id':'cxArticleBodyText'}),
+                               ]
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -82,7 +82,7 @@ class PML2PMLZ(FileTypePlugin):

        return of.name

-
+# Metadata reader plugins {{{
 class ComicMetadataReader(MetadataReaderPlugin):

    name = 'Read comic metadata'
@ -320,7 +320,9 @@ class ZipMetadataReader(MetadataReaderPlugin):
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.zip import get_metadata
        return get_metadata(stream)
+# }}}

+# Metadata writer plugins {{{

 class EPUBMetadataWriter(MetadataWriterPlugin):

@ -396,6 +398,7 @@ class TOPAZMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.topaz import set_metadata
        set_metadata(stream, mi)

+# }}}

 from calibre.ebooks.comic.input import ComicInput
 from calibre.ebooks.epub.input import EPUBInput
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -844,7 +844,7 @@ class Manifest(object):
                nroot = etree.fromstring('<html></html>')
                has_body = False
                for child in list(data):
-                    if barename(child.tag) == 'body':
+                    if isinstance(child.tag, (unicode, str)) and barename(child.tag) == 'body':
                        has_body = True
                        break
                parent = nroot
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -63,7 +63,8 @@ class TXTInput(InputFormatPlugin):
                raise ValueError('This txt file has malformed markup, it cannot be'
                    ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
        else:
-            html = convert_basic(txt)
+            flow_size = getattr(options, 'flow_size', 0)
+            html = convert_basic(txt, epub_split_size_kb=flow_size)

        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -17,13 +17,10 @@ __docformat__ = 'restructuredtext en'

 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'

-def convert_basic(txt, title=''):
-    lines = []
+def convert_basic(txt, title='', epub_split_size_kb=0):
    # Strip whitespace from the beginning and end of the line. Also replace
    # all line breaks with \n.
-    for line in txt.splitlines():
-        lines.append(line.strip())
-    txt = '\n'.join(lines)
+    txt = '\n'.join([line.strip() for line in txt.splitlines()])

    # Condense redundant spaces
    txt = re.sub('[ ]{2,}', ' ', txt)
@ -34,6 +31,15 @@ def convert_basic(txt, title=''):
    # Remove excessive line breaks.
    txt = re.sub('\n{3,}', '\n\n', txt)

+    #Takes care if there is no point to split
+    if epub_split_size_kb > 0:
+        length_byte = len(txt.encode('utf-8'))
+        #Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
+        chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024) ) + 2 ))
+        #if there are chunks with a superior size then go and break
+        if (len(filter(lambda x: len(x.encode('utf-8')) > chunk_size, txt.split('\n\n')))) :
+            txt = u'\n\n'.join([split_string_separator(line, chunk_size) for line in txt.split('\n\n')])
+
    lines = []
    # Split into paragraphs based on having a blank line between text.
    for line in txt.split('\n\n'):
@ -71,3 +77,10 @@ def opf_writer(path, opf_name, manifest, spine, mi):
    with open(os.path.join(path, opf_name), 'wb') as opffile:
        opf.render(opffile)

+def split_string_separator(txt, size) :
+    if len(txt.encode('utf-8')) > size:
+        txt = u''.join([re.sub(u'\.(?P<ends>[^.]*)$', u'.\n\n\g<ends>',
+            txt[i:i+size], 1) for i in
+            xrange(0, len(txt.encode('utf-8')), size)])
+    return txt
+
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -162,6 +162,9 @@ turned into a collection on the reader. Note that the PRS-500 does not support c
 How do I use |app| with my iPad/iPhone/iTouch?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+Over the air
+^^^^^^^^^^^^^^
+
 The easiest way to browse your |app| collection on your Apple device (iPad/iPhone/iPod) is by using the *free* Stanza app, available from the Apple app store. You need at least Stanza version 3.0. Stanza allows you to access your |app| collection wirelessly, over the air.

 First perform the following steps in |app|
@ -181,13 +184,13 @@ Replace ``192.168.1.2`` with the local IP address of the computer running |app|.

 If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.

-Alternative for the iPad
+With the USB cable
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^

-As of |app| version 0.7.0, you can plugin your iPad into the computer using its charging cable, and |app| will detect it and show you a list of books on the iPad. You can then use the Send to device button to send books directly to iBooks on the iPad.
+As of |app| version 0.7.0, you can plug your iDevice into the computer using its charging cable, and |app| will detect it and show you a list of books on the device. You can then use the *Send to device button* to send books directly to iBooks on the device. Note that you must have at least iOS 4 installed on your iPhone/iTouch for this to work.

-This method only works on Windows XP and higher and OS X 10.5 and higher. Linux is not supported (iTunes is not available in linux) and OS X 10.4 is not supported. For more details, see
-`this forum post <http://www.mobileread.com/forums/showpost.php?p=944079&postcount=1>`_.
+This method only works on Windows XP and higher and OS X 10.5 and higher. Linux is not supported (iTunes is not available in linux) and OS X 10.4 is not supported.
+For more details on how this works, see `this forum post <http://www.mobileread.com/forums/showpost.php?p=944079&postcount=1>`_.

 How do I use |app| with my Android phone?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/ta.po
+++ b/src/calibre/translations/ta.po
--- a/src/calibre/translations/tr.po
+++ b/src/calibre/translations/tr.po
--- a/src/calibre/translations/vi.po
+++ b/src/calibre/translations/vi.po
--- a/src/calibre/translations/zh_CN.po
+++ b/src/calibre/translations/zh_CN.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/utils/magick_draw.py
+++ b/src/calibre/utils/magick_draw.py
@ -65,6 +65,9 @@ class TextLine(object):
        self.bottom_margin = bottom_margin
        self.font_path = font_path

+    def __repr__(self):
+        return u'TextLine:%r:%f'%(self.text, self.font_size)
+
 def alloc_wand(name):
    ans = getattr(p, name)()
    if ans < 0:
@ -120,6 +123,10 @@ def draw_centered_text(img, dw, text, top, margin=10):
    tokens = text.split(' ')
    while tokens:
        line, tokens = _get_line(img, dw, tokens, img_width-2*margin)
+        if not line:
+            # Could not fit the first token on the line
+            line = tokens[:1]
+            tokens = tokens[1:]
        bottom = draw_centered_line(img, dw, ' '.join(line), top)
        top = bottom
    return top