Merge from trunk

This commit is contained in:
Charles Haley 2010-07-02 21:38:10 +01:00
commit 502e80c972
32 changed files with 16610 additions and 12550 deletions

View File

@ -0,0 +1,45 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.foreignpolicy.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ForeignPolicy(BasicNewsRecipe):
title = 'Foreign Policy'
__author__ = 'Darko Miletic'
description = 'International News'
publisher = 'Washingtonpost.Newsweek Interactive, LLC'
category = 'news, politics, USA'
oldest_article = 31
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif } img{margin-bottom: 0.4em} h1,h2,h3,h4,h5,h6{font-family: Arial,Helvetica,sans-serif} '
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(attrs={'id':['art-mast','art-body','auth-bio']})]
remove_tags = [dict(name='iframe'),dict(attrs={'id':['share-box','base-ad']})]
remove_attributes = ['height','width']
feeds = [(u'Articles', u'http://www.foreignpolicy.com/node/feed')]
def print_version(self, url):
return url + '?print=yes&page=full'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278049615(BasicNewsRecipe):
title = u'Statesman'
pubisher = 'http://www.statesman.com/'
description = 'Austin Texas Daily Newspaper'
category = 'News, Austin, Texas'
__author__ = 'rty'
oldest_article = 3
max_articles_per_feed = 100
feeds = [(u'News', u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
(u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
(u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
(u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
(u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
]
masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
#temp_files = []
#articles_are_obfuscated = True
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
remove_tags = [
dict(name='div', attrs={'id':'cxArticleOptions'}),
]
keep_only_tags = [
dict(name='div', attrs={'class':'cxArticleHeader'}),
dict(name='div', attrs={'id':'cxArticleBodyText'}),
]

View File

@ -82,7 +82,7 @@ class PML2PMLZ(FileTypePlugin):
return of.name
# Metadata reader plugins {{{
class ComicMetadataReader(MetadataReaderPlugin):
name = 'Read comic metadata'
@ -320,7 +320,9 @@ class ZipMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.zip import get_metadata
return get_metadata(stream)
# }}}
# Metadata writer plugins {{{
class EPUBMetadataWriter(MetadataWriterPlugin):
@ -396,6 +398,7 @@ class TOPAZMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.topaz import set_metadata
set_metadata(stream, mi)
# }}}
from calibre.ebooks.comic.input import ComicInput
from calibre.ebooks.epub.input import EPUBInput

View File

@ -844,7 +844,7 @@ class Manifest(object):
nroot = etree.fromstring('<html></html>')
has_body = False
for child in list(data):
if barename(child.tag) == 'body':
if isinstance(child.tag, (unicode, str)) and barename(child.tag) == 'body':
has_body = True
break
parent = nroot

View File

@ -63,7 +63,8 @@ class TXTInput(InputFormatPlugin):
raise ValueError('This txt file has malformed markup, it cannot be'
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
else:
html = convert_basic(txt)
flow_size = getattr(options, 'flow_size', 0)
html = convert_basic(txt, epub_split_size_kb=flow_size)
from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html')

View File

@ -17,13 +17,10 @@ __docformat__ = 'restructuredtext en'
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
def convert_basic(txt, title=''):
lines = []
def convert_basic(txt, title='', epub_split_size_kb=0):
# Strip whitespace from the beginning and end of the line. Also replace
# all line breaks with \n.
for line in txt.splitlines():
lines.append(line.strip())
txt = '\n'.join(lines)
txt = '\n'.join([line.strip() for line in txt.splitlines()])
# Condense redundant spaces
txt = re.sub('[ ]{2,}', ' ', txt)
@ -34,6 +31,15 @@ def convert_basic(txt, title=''):
# Remove excessive line breaks.
txt = re.sub('\n{3,}', '\n\n', txt)
#Takes care if there is no point to split
if epub_split_size_kb > 0:
length_byte = len(txt.encode('utf-8'))
#Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
chunk_size = long(length_byte / (int(length_byte / (epub_split_size_kb * 1024) ) + 2 ))
#if there are chunks with a superior size then go and break
if (len(filter(lambda x: len(x.encode('utf-8')) > chunk_size, txt.split('\n\n')))) :
txt = u'\n\n'.join([split_string_separator(line, chunk_size) for line in txt.split('\n\n')])
lines = []
# Split into paragraphs based on having a blank line between text.
for line in txt.split('\n\n'):
@ -71,3 +77,10 @@ def opf_writer(path, opf_name, manifest, spine, mi):
with open(os.path.join(path, opf_name), 'wb') as opffile:
opf.render(opffile)
def split_string_separator(txt, size) :
if len(txt.encode('utf-8')) > size:
txt = u''.join([re.sub(u'\.(?P<ends>[^.]*)$', u'.\n\n\g<ends>',
txt[i:i+size], 1) for i in
xrange(0, len(txt.encode('utf-8')), size)])
return txt

View File

@ -162,6 +162,9 @@ turned into a collection on the reader. Note that the PRS-500 does not support c
How do I use |app| with my iPad/iPhone/iTouch?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Over the air
^^^^^^^^^^^^^^
The easiest way to browse your |app| collection on your Apple device (iPad/iPhone/iPod) is by using the *free* Stanza app, available from the Apple app store. You need at least Stanza version 3.0. Stanza allows you to access your |app| collection wirelessly, over the air.
First perform the following steps in |app|
@ -181,13 +184,13 @@ Replace ``192.168.1.2`` with the local IP address of the computer running |app|.
If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.
Alternative for the iPad
With the USB cable
^^^^^^^^^^^^^^^^^^^^^^^^^^^
As of |app| version 0.7.0, you can plugin your iPad into the computer using its charging cable, and |app| will detect it and show you a list of books on the iPad. You can then use the Send to device button to send books directly to iBooks on the iPad.
As of |app| version 0.7.0, you can plug your iDevice into the computer using its charging cable, and |app| will detect it and show you a list of books on the device. You can then use the *Send to device button* to send books directly to iBooks on the device. Note that you must have at least iOS 4 installed on your iPhone/iTouch for this to work.
This method only works on Windows XP and higher and OS X 10.5 and higher. Linux is not supported (iTunes is not available in linux) and OS X 10.4 is not supported. For more details, see
`this forum post <http://www.mobileread.com/forums/showpost.php?p=944079&postcount=1>`_.
This method only works on Windows XP and higher and OS X 10.5 and higher. Linux is not supported (iTunes is not available in linux) and OS X 10.4 is not supported.
For more details on how this works, see `this forum post <http://www.mobileread.com/forums/showpost.php?p=944079&postcount=1>`_.
How do I use |app| with my Android phone?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -65,6 +65,9 @@ class TextLine(object):
self.bottom_margin = bottom_margin
self.font_path = font_path
def __repr__(self):
return u'TextLine:%r:%f'%(self.text, self.font_size)
def alloc_wand(name):
ans = getattr(p, name)()
if ans < 0:
@ -120,6 +123,10 @@ def draw_centered_text(img, dw, text, top, margin=10):
tokens = text.split(' ')
while tokens:
line, tokens = _get_line(img, dw, tokens, img_width-2*margin)
if not line:
# Could not fit the first token on the line
line = tokens[:1]
tokens = tokens[1:]
bottom = draw_centered_line(img, dw, ' '.join(line), top)
top = bottom
return top