mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
f6f4b4782d
@ -58,7 +58,7 @@
|
||||
<input type="hidden" name="cmd" value="_s-xclick"></input>
|
||||
<input type="hidden" name="hosted_button_id" value="3028915"></input>
|
||||
<input type="image"
|
||||
src="http://calibre-ebook.com/site_media//img/button-donate.png"
|
||||
src="/static/button-donate.png"
|
||||
name="submit"></input>
|
||||
<img alt="" src="https://www.paypal.com/en_US/i/scr/pixel.gif"
|
||||
width="1" height="1"></img>
|
||||
|
BIN
resources/content_server/button-donate.png
Normal file
BIN
resources/content_server/button-donate.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.6 KiB |
@ -203,3 +203,11 @@ content_server_wont_display = ['']
|
||||
# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
|
||||
maximum_resort_levels = 5
|
||||
|
||||
# Absolute path to a TTF font file to use as the font for the title and author
|
||||
# when generating a default cover. Useful if the default font (Liberation
|
||||
# Serif) does not contain glyphs for the language of the books in your library.
|
||||
generate_cover_title_font = None
|
||||
|
||||
# Absolute path to a TTF font file to use as the font for the footer in the
|
||||
# default cover
|
||||
generate_cover_foot_font = None
|
||||
|
@ -1,9 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
newyorker.com
|
||||
cubadebate.cu
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -14,31 +12,43 @@ class CubaDebate(BasicNewsRecipe):
|
||||
description = 'Contra el Terorismo Mediatico'
|
||||
oldest_article = 15
|
||||
language = 'es'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
publisher = 'Cubadebate'
|
||||
category = 'news, politics, Cuba'
|
||||
encoding = 'utf-8'
|
||||
extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} '
|
||||
masthead_url = 'http://www.cubadebate.cu/wp-content/themes/cubadebate/images/logo.gif'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = """
|
||||
#BlogTitle{font-size: xx-large; font-weight: bold}
|
||||
body{font-family: Verdana, Arial, Tahoma, sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : 'es'
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'pretty_print': True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
|
||||
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
|
||||
remove_tags = [dict(name='link')]
|
||||
remove_tags = [
|
||||
dict(name=['link','base','embed','object','meta','iframe'])
|
||||
,dict(attrs={'id':'addthis_container'})
|
||||
]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
||||
remove_attributes=['width','height','lang']
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
64
resources/recipes/ming_pao.recipe
Normal file
64
resources/recipes/ming_pao.recipe
Normal file
@ -0,0 +1,64 @@
|
||||
cense__ = 'GPL v3'
|
||||
__copyright__ = '2010, Eddie Lau'
|
||||
'''
|
||||
modified from Singtao Toronto calibre recipe by rty
|
||||
'''
|
||||
|
||||
import datetime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = 'Hong Kong Chinese Newspaper'
|
||||
publisher = 'news.mingpao.com'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(attrs={'id':['newscontent01','newscontent02']})]
|
||||
|
||||
def get_fetchdate(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time
|
||||
dt_local = dt_utc - datetime.timedelta(-8.0/24)
|
||||
return dt_local.strftime("%Y%m%d")
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet']})
|
||||
current_articles = []
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(width=True):
|
||||
del item['width']
|
||||
return soup
|
||||
|
@ -4,136 +4,57 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
nytimes.com
|
||||
V5 - One picture per article, moved to top:
|
||||
Headline
|
||||
Image
|
||||
Byline
|
||||
Story
|
||||
'''
|
||||
import re, string, time
|
||||
import string, re, time
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, Tag
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
def decode(self, src):
|
||||
enc = 'utf-8'
|
||||
if 'iso-8859-1' in src:
|
||||
enc = 'cp1252'
|
||||
return src.decode(enc, 'ignore')
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
title = 'The New York Times'
|
||||
__author__ = 'GRiker'
|
||||
title = u'New York Times'
|
||||
__author__ = 'Kovid Goyal/Nick Redding'
|
||||
language = 'en'
|
||||
requires_version = (0, 7, 5)
|
||||
requires_version = (0, 6, 36)
|
||||
|
||||
description = 'Daily news from the New York Times (subscription version)'
|
||||
allSectionKeywords = ['The Front Page', 'International','National','Obituaries','Editorials',
|
||||
'New York','Business Day','Science Times','Sports','Dining','Arts',
|
||||
'Home','Styles','Sunday Business','Week In Review','Travel','Magazine',
|
||||
'Book Review','Weddings','Real Estate','Automobiles',"T Men's Fashion",
|
||||
"T Women's Fashion"]
|
||||
|
||||
# List of sections to exclude
|
||||
# To add a section, copy the section name from the allSectionKeywords list above
|
||||
# For example, to exclude 'Dining' and 'Weddings':
|
||||
#excludeSectionKeywords = ['Dining','Weddings']
|
||||
excludeSectionKeywords = []
|
||||
|
||||
# List of sections to include (test and debug only)
|
||||
# By default, any sections in today's paper that are not listed in excludeSectionKeywords
|
||||
# are downloaded. fetch_only specifies that only certain sections are to be downloaded.
|
||||
# This should only be used for testing and debugging.
|
||||
# For example, to download only 'The Front Page' section:
|
||||
# fetch_only = set(['The Front Page'])
|
||||
fetch_only = set([])
|
||||
if fetch_only:
|
||||
excludeSectionKeywords = list(set(allSectionKeywords) ^ fetch_only)
|
||||
|
||||
# one_picture_per_article specifies that calibre should only use the first image
|
||||
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||
# will be moved to a location between the headline and the byline.
|
||||
# If one_picture_per_article = False, all images from the article will be included
|
||||
# and shown in their original location.
|
||||
one_picture_per_article = True
|
||||
|
||||
timefmt = ''
|
||||
timefmt = ' [%b %d]'
|
||||
needs_subscription = True
|
||||
remove_tags_before = dict(id='article')
|
||||
remove_tags_after = dict(id='article')
|
||||
remove_tags = [dict(attrs={'class':[
|
||||
'articleFooter',
|
||||
'articleTools',
|
||||
'columnGroup doubleRule',
|
||||
'columnGroup singleRule',
|
||||
'columnGroup last',
|
||||
'columnGroup last',
|
||||
'doubleRule',
|
||||
'dottedLine',
|
||||
'entry-meta',
|
||||
'entry-response module',
|
||||
'icon enlargeThis',
|
||||
'leftNavTabs',
|
||||
'module box nav',
|
||||
'nextArticleLink',
|
||||
'nextArticleLink clearfix',
|
||||
'post-tools',
|
||||
'relatedSearchesModule',
|
||||
'side_tool',
|
||||
'singleAd',
|
||||
'subNavigation clearfix',
|
||||
'subNavigation tabContent active',
|
||||
'subNavigation tabContent active clearfix',
|
||||
]}),
|
||||
dict(id=[
|
||||
'adxLeaderboard',
|
||||
'archive',
|
||||
'articleExtras',
|
||||
'articleInline',
|
||||
'blog_sidebar',
|
||||
'businessSearchBar',
|
||||
'cCol',
|
||||
'entertainmentSearchBar',
|
||||
'footer',
|
||||
'header',
|
||||
'header_search',
|
||||
'login',
|
||||
'masthead',
|
||||
'masthead-nav',
|
||||
'memberTools',
|
||||
'navigation',
|
||||
'portfolioInline',
|
||||
'relatedArticles',
|
||||
'respond',
|
||||
'side_search',
|
||||
'side_index',
|
||||
'side_tool',
|
||||
'toolsRight',
|
||||
]),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
cover_margins = (18,18,'grey99')
|
||||
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool','nextArticleLink',
|
||||
'nextArticleLink clearfix','columnGroup doubleRule','doubleRule','entry-meta',
|
||||
'icon enlargeThis','columnGroup last','relatedSearchesModule']}),
|
||||
dict({'class':re.compile('^subNavigation')}),
|
||||
dict({'class':re.compile('^leaderboard')}),
|
||||
dict({'class':re.compile('^module')}),
|
||||
dict({'class':'metaFootnote'}),
|
||||
dict(id=['inlineBox','footer', 'toolsRight', 'articleInline','login','masthead',
|
||||
'navigation', 'archive', 'side_search', 'blog_sidebar','cCol','portfolioInline',
|
||||
'side_tool', 'side_index','header','readerReviewsCount','readerReviews',
|
||||
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
||||
dict(name=['script', 'noscript', 'style','form','hr'])]
|
||||
encoding = decode
|
||||
no_stylesheets = True
|
||||
extra_css = '.headline {text-align: left;}\n \
|
||||
.byline {font-family: monospace; \
|
||||
text-align: left; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.dateline {font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.timestamp {font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.source {text-align: left;}\n \
|
||||
.image {text-align: center;}\n \
|
||||
.credit {text-align: right; \
|
||||
font-size: small; \
|
||||
margin-top: 0px; \
|
||||
margin-bottom: 0px;}\n \
|
||||
.articleBody {text-align: left;}\n \
|
||||
.authorId {text-align: left; \
|
||||
font-style: italic;}\n '
|
||||
extra_css = '''
|
||||
.articleHeadline { margin-top:0.5em; margin-bottom:0.25em; }
|
||||
.credit { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.byline { font-size: small; font-style:italic; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.dateline { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.timestamp { font-size: small; }
|
||||
.caption { font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
a:link {text-decoration: none; }'''
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
try:
|
||||
br.open('http://www.nytimes.com/auth/login')
|
||||
br.select_form(name='login')
|
||||
br['USERID'] = self.username
|
||||
@ -142,11 +63,20 @@ class NYTimes(BasicNewsRecipe):
|
||||
if 'Sorry, we could not find the combination you entered. Please try again.' in raw:
|
||||
raise Exception('Your username and password are incorrect')
|
||||
#open('/t/log.html', 'wb').write(raw)
|
||||
except:
|
||||
self.log("\nFailed to login")
|
||||
|
||||
return br
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
#masthead = 'http://members.cox.net/nickredding/nytlogo.gif'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nMasthead unavailable")
|
||||
masthead = None
|
||||
return masthead
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
cover = None
|
||||
st = time.localtime()
|
||||
@ -162,316 +92,101 @@ class NYTimes(BasicNewsRecipe):
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def get_masthead_title(self):
|
||||
return self.title
|
||||
|
||||
def dump_ans(self, ans):
|
||||
total_article_count = 0
|
||||
for section in ans :
|
||||
if self.verbose:
|
||||
self.log("section %s: %d articles" % (section[0], len(section[1])) )
|
||||
for article in section[1]:
|
||||
total_article_count += 1
|
||||
if self.verbose:
|
||||
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('mac-roman','replace'),
|
||||
article['url'].encode('mac-roman','replace')))
|
||||
self.log( "Queued %d articles" % total_article_count )
|
||||
|
||||
def dump_hex(self, src, length=16):
|
||||
''' Diagnostic '''
|
||||
FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
|
||||
N=0; result=''
|
||||
while src:
|
||||
s,src = src[:length],src[length:]
|
||||
hexa = ' '.join(["%02X"%ord(x) for x in s])
|
||||
s = s.translate(FILTER)
|
||||
result += "%04X %-*s %s\n" % (N, length*3, hexa, s)
|
||||
N+=length
|
||||
print result
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
def short_title(self):
|
||||
return 'New York Times'
|
||||
|
||||
def parse_index(self):
|
||||
self.encoding = 'cp1252'
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||
self.encoding = decode
|
||||
|
||||
def feed_title(div):
|
||||
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
||||
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
# Find each instance of class="section-headline", class="story", class="story headline"
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline']}):
|
||||
url_list = []
|
||||
|
||||
if div['class'] == 'section-headline':
|
||||
key = string.capwords(feed_title(div))
|
||||
if self.excludeSectionKeywords:
|
||||
excluded = re.compile('|'.join(self.excludeSectionKeywords))
|
||||
if excluded.search(key):
|
||||
self.log("Skipping section %s" % key)
|
||||
continue
|
||||
articles[key] = []
|
||||
ans.append(key)
|
||||
|
||||
elif div['class'] in ['story', 'story headline'] :
|
||||
def handle_article(div):
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
return
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
return
|
||||
if not url.endswith(".html"):
|
||||
return
|
||||
if 'podcast' in url:
|
||||
return
|
||||
url += '?pagewanted=all'
|
||||
|
||||
title = self.massageNCXText(self.tag_to_string(a, use_alt=True).strip())
|
||||
|
||||
if url in url_list:
|
||||
return
|
||||
url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
#self.log("Title: %s" % title)
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.massageNCXText(self.tag_to_string(summary, use_alt=False))
|
||||
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
author = ''
|
||||
authorAttribution = div.find(True, attrs={'class':'storyheadline-author'})
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
else:
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
# Kill commas - Kindle switches to '&'
|
||||
author = re.sub(',','',author)
|
||||
|
||||
feed = key if key is not None else 'Uncategorized'
|
||||
if not articles.has_key(feed):
|
||||
articles[feed] = []
|
||||
if not 'podcasts' in url:
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
ans = self.sort_index_by(ans, {'The Front Page':-1,
|
||||
'Dining In, Dining Out':1,
|
||||
'Obituaries':2})
|
||||
|
||||
|
||||
|
||||
# Find each instance of class="section-headline", class="story", class="story headline"
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
|
||||
if div['class'] in ['section-headline','sectionHeader']:
|
||||
key = string.capwords(feed_title(div))
|
||||
articles[key] = []
|
||||
ans.append(key)
|
||||
#self.log('Section: %s' % key)
|
||||
|
||||
elif div['class'] in ['story', 'story headline'] :
|
||||
handle_article(div)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
handle_article(lidiv)
|
||||
|
||||
# ans = self.sort_index_by(ans, {'The Front Page':-1,
|
||||
# 'Dining In, Dining Out':1,
|
||||
# 'Obituaries':2})
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
self.dump_ans(ans)
|
||||
|
||||
return ans
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
# Skip ad pages served before actual article
|
||||
skip_tag = soup.find(True, {'name':'skip'})
|
||||
if skip_tag is not None:
|
||||
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
url += '?pagewanted=all'
|
||||
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
return self.index_to_soup(url, raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
def postprocess_html(self,soup, True):
|
||||
print "\npostprocess_html()\n"
|
||||
|
||||
if self.one_picture_per_article:
|
||||
# Remove all images after first
|
||||
largeImg = soup.find(True, {'class':'articleSpanImage'})
|
||||
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
|
||||
if largeImg:
|
||||
for inlineImg in inlineImgs:
|
||||
inlineImg.extract()
|
||||
else:
|
||||
if inlineImgs:
|
||||
firstImg = inlineImgs[0]
|
||||
for inlineImg in inlineImgs[1:]:
|
||||
inlineImg.extract()
|
||||
# Move firstImg after headline
|
||||
cgFirst = soup.find(True, {'class':'columnGroup first'})
|
||||
if cgFirst:
|
||||
# Strip all sibling NavigableStrings: noise
|
||||
navstrings = cgFirst.findAll(text=True, recursive=False)
|
||||
[ns.extract() for ns in navstrings]
|
||||
headline_found = False
|
||||
tag = cgFirst.find(True)
|
||||
insertLoc = 0
|
||||
while True:
|
||||
insertLoc += 1
|
||||
if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
|
||||
headline_found = True
|
||||
break
|
||||
tag = tag.nextSibling
|
||||
if not tag:
|
||||
headline_found = False
|
||||
break
|
||||
if headline_found:
|
||||
cgFirst.insert(insertLoc,firstImg)
|
||||
else:
|
||||
self.log(">>> No class:'columnGroup first' found <<<")
|
||||
# Change class="kicker" to <h3>
|
||||
kicker = soup.find(True, {'class':'kicker'})
|
||||
if kicker and kicker.contents and kicker.contents[0]:
|
||||
h3Tag = Tag(soup, "h3")
|
||||
h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
|
||||
use_alt=False)))
|
||||
kicker.replaceWith(h3Tag)
|
||||
|
||||
# Change captions to italic -1
|
||||
for caption in soup.findAll(True, {'class':'caption'}) :
|
||||
if caption and caption.contents[0]:
|
||||
emTag = Tag(soup, "em")
|
||||
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
|
||||
mp_off = c.find("More Photos")
|
||||
if mp_off >= 0:
|
||||
c = c[:mp_off]
|
||||
emTag.insert(0, c)
|
||||
#hrTag = Tag(soup, 'hr')
|
||||
#hrTag['class'] = 'caption_divider'
|
||||
hrTag = Tag(soup, 'div')
|
||||
hrTag['class'] = 'divider'
|
||||
emTag.insert(1, hrTag)
|
||||
caption.replaceWith(emTag)
|
||||
|
||||
# Change <nyt_headline> to <h2>
|
||||
h1 = soup.find('h1')
|
||||
if h1:
|
||||
headline = h1.find("nyt_headline")
|
||||
if headline:
|
||||
tag = Tag(soup, "h2")
|
||||
tag['class'] = "headline"
|
||||
tag.insert(0, self.fixChars(headline.contents[0]))
|
||||
h1.replaceWith(tag)
|
||||
else:
|
||||
# Blog entry - replace headline, remove <hr> tags
|
||||
headline = soup.find('title')
|
||||
if headline:
|
||||
tag = Tag(soup, "h2")
|
||||
tag['class'] = "headline"
|
||||
tag.insert(0, self.fixChars(headline.contents[0]))
|
||||
soup.insert(0, tag)
|
||||
hrs = soup.findAll('hr')
|
||||
for hr in hrs:
|
||||
hr.extract()
|
||||
|
||||
# Change <h1> to <h3> - used in editorial blogs
|
||||
masthead = soup.find("h1")
|
||||
if masthead:
|
||||
# Nuke the href
|
||||
if masthead.a:
|
||||
del(masthead.a['href'])
|
||||
tag = Tag(soup, "h3")
|
||||
tag.insert(0, self.fixChars(masthead.contents[0]))
|
||||
masthead.replaceWith(tag)
|
||||
|
||||
# Change <span class="bold"> to <b>
|
||||
for subhead in soup.findAll(True, {'class':'bold'}) :
|
||||
if subhead.contents:
|
||||
bTag = Tag(soup, "b")
|
||||
bTag.insert(0, subhead.contents[0])
|
||||
subhead.replaceWith(bTag)
|
||||
|
||||
# Synthesize a section header
|
||||
dsk = soup.find('meta', attrs={'name':'dsk'})
|
||||
if dsk and dsk.has_key('content'):
|
||||
hTag = Tag(soup,'h3')
|
||||
hTag['class'] = 'section'
|
||||
hTag.insert(0,NavigableString(dsk['content']))
|
||||
articleTag = soup.find(True, attrs={'id':'article'})
|
||||
if articleTag:
|
||||
articleTag.insert(0,hTag)
|
||||
|
||||
# Add class="articleBody" to <div> so we can format with CSS
|
||||
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||
if divTag:
|
||||
divTag['class'] = divTag['id']
|
||||
|
||||
# Add class="authorId" to <div> so we can format with CSS
|
||||
divTag = soup.find('div',attrs={'id':'authorId'})
|
||||
if divTag and divTag.contents[0]:
|
||||
tag = Tag(soup, "p")
|
||||
tag['class'] = "authorId"
|
||||
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
|
||||
use_alt=False)))
|
||||
divTag.replaceWith(tag)
|
||||
|
||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||
if kicker_tag:
|
||||
tagline = self.tag_to_string(kicker_tag)
|
||||
#self.log("FOUND KICKER %s" % tagline)
|
||||
if tagline=='Op-Ed Columnist':
|
||||
img_div = soup.find('div','inlineImage module')
|
||||
#self.log("Searching for photo")
|
||||
if img_div:
|
||||
img_div.extract()
|
||||
#self.log("Photo deleted")
|
||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||
if refresh is None:
|
||||
return soup
|
||||
content = refresh.get('content').partition('=')[2]
|
||||
raw = self.browser.open_novisit('http://www.nytimes.com'+content).read()
|
||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||
|
||||
def populate_article_metadata(self,article,soup,first):
|
||||
'''
|
||||
Extract author and description from article, add to article metadata
|
||||
'''
|
||||
def extract_author(soup):
|
||||
byline = soup.find('meta',attrs={'name':['byl','CLMST']})
|
||||
if byline :
|
||||
author = byline['content']
|
||||
else :
|
||||
# Try for <div class="byline">
|
||||
byline = soup.find('div', attrs={'class':'byline'})
|
||||
if byline:
|
||||
author = byline.renderContents()
|
||||
else:
|
||||
print soup.prettify()
|
||||
return None
|
||||
return author
|
||||
|
||||
def extract_description(soup):
|
||||
description = soup.find('meta',attrs={'name':['description','description ']})
|
||||
if description :
|
||||
return self.massageNCXText(description['content'])
|
||||
else:
|
||||
# Take first paragraph of article
|
||||
articlebody = soup.find('div',attrs={'id':'articlebody'})
|
||||
if not articlebody:
|
||||
# Try again with class instead of id
|
||||
articlebody = soup.find('div',attrs={'class':'articlebody'})
|
||||
if not articlebody:
|
||||
print 'postprocess_book.extract_description(): Did not find <div id="articlebody">:'
|
||||
print soup.prettify()
|
||||
return None
|
||||
paras = articlebody.findAll('p')
|
||||
for p in paras:
|
||||
if p.renderContents() > '' :
|
||||
return self.massageNCXText(self.tag_to_string(p,use_alt=False))
|
||||
return None
|
||||
|
||||
if not article.author:
|
||||
article.author = extract_author(soup)
|
||||
if not article.summary:
|
||||
article.summary = article.text_summary = extract_description(soup)
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('utf-8','replace'))
|
||||
#a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
|
@ -54,7 +54,7 @@ reflow_error = poppler_error if poppler_error else magick_error
|
||||
|
||||
pdfreflow_libs = []
|
||||
if iswindows:
|
||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
|
||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32', 'zlib']
|
||||
|
||||
extensions = [
|
||||
|
||||
|
@ -213,7 +213,7 @@ It contains correct fonts.conf etc.
|
||||
poppler
|
||||
-------------
|
||||
|
||||
In Cmake: disable GTK, Qt, OPenjpeg, zlib, lcms, gtk_tests, qt_tests. Enable qt4, jpeg, png and zlib
|
||||
In Cmake: disable GTK, Qt, OPenjpeg, cpp, lcms, gtk_tests, qt_tests. Enable qt4, jpeg, png and zlib
|
||||
|
||||
NOTE: poppler must be built as a static library, unless you build the qt4 bindings
|
||||
|
||||
|
@ -4,6 +4,7 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
from itertools import izip
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
@ -417,6 +418,13 @@ class iPadOutput(OutputProfile):
|
||||
'''
|
||||
# }}}
|
||||
|
||||
class TabletOutput(iPadOutput):
|
||||
name = 'Tablet'
|
||||
short_name = 'tablet'
|
||||
description = _('Intended for generic tablet devices, does no resizing of images')
|
||||
|
||||
screen_size = (sys.maxint, sys.maxint)
|
||||
comic_screen_size = (sys.maxint, sys.maxint)
|
||||
|
||||
class SonyReaderOutput(OutputProfile):
|
||||
|
||||
@ -664,7 +672,7 @@ class BambookOutput(OutputProfile):
|
||||
output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
|
||||
SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
|
||||
HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
|
||||
iPadOutput, KoboReaderOutput,
|
||||
iPadOutput, KoboReaderOutput, TabletOutput,
|
||||
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
|
||||
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
|
||||
BambookOutput, ]
|
||||
|
@ -20,7 +20,8 @@ class ANDROID(USBMS):
|
||||
VENDOR_ID = {
|
||||
# HTC
|
||||
0x0bb4 : { 0x0c02 : [0x100, 0x0227], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226]},
|
||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
||||
0xc92 : [0x100]},
|
||||
|
||||
# Motorola
|
||||
0x22b8 : { 0x41d9 : [0x216], 0x2d67 : [0x100], 0x41db : [0x216],
|
||||
|
@ -22,7 +22,9 @@ class KOBO(USBMS):
|
||||
gui_name = 'Kobo Reader'
|
||||
description = _('Communicate with the Kobo Reader')
|
||||
author = 'Timothy Legge and Kovid Goyal'
|
||||
version = (1, 0, 6)
|
||||
version = (1, 0, 7)
|
||||
|
||||
dbversion = 0
|
||||
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
|
||||
@ -92,7 +94,7 @@ class KOBO(USBMS):
|
||||
if lpath.startswith(os.sep):
|
||||
lpath = lpath[len(os.sep):]
|
||||
lpath = lpath.replace('\\', '/')
|
||||
# print "LPATH: " + lpath
|
||||
# debug_print("LPATH: ", lpath, " - Title: " , title)
|
||||
|
||||
playlist_map = {}
|
||||
|
||||
@ -112,7 +114,7 @@ class KOBO(USBMS):
|
||||
#print "Image name Normalized: " + imagename
|
||||
if imagename is not None:
|
||||
bl[idx].thumbnail = ImageWrapper(imagename)
|
||||
if ContentType != '6':
|
||||
if (ContentType != '6'and self.dbversion < 8) or (self.dbversion >= 8):
|
||||
if self.update_metadata_item(bl[idx]):
|
||||
# print 'update_metadata_item returned true'
|
||||
changed = True
|
||||
@ -120,10 +122,16 @@ class KOBO(USBMS):
|
||||
playlist_map[lpath] not in bl[idx].device_collections:
|
||||
bl[idx].device_collections.append(playlist_map[lpath])
|
||||
else:
|
||||
if ContentType == '6':
|
||||
if ContentType == '6' and self.dbversion < 8:
|
||||
book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576)
|
||||
else:
|
||||
try:
|
||||
book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID)
|
||||
except:
|
||||
debug_print("prefix: ", prefix, "lpath: ", lpath, "title: ", title, "authors: ", authors, \
|
||||
"mime: ", mime, "date: ", date, "ContentType: ", ContentType, "ImageID: ", ImageID)
|
||||
raise
|
||||
|
||||
# print 'Update booklist'
|
||||
book.device_collections = [playlist_map[lpath]] if lpath in playlist_map else []
|
||||
|
||||
@ -143,6 +151,13 @@ class KOBO(USBMS):
|
||||
# numrows = row[0]
|
||||
#cursor.close()
|
||||
|
||||
# Determine the database version
|
||||
# 4 - Bluetooth Kobo Rev 2 (1.4)
|
||||
# 8 - WIFI KOBO Rev 1
|
||||
cursor.execute('select version from dbversion')
|
||||
result = cursor.fetchone()
|
||||
self.dbversion = result[0]
|
||||
|
||||
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
|
||||
'ImageID, ReadStatus from content where BookID is Null'
|
||||
|
||||
@ -153,7 +168,8 @@ class KOBO(USBMS):
|
||||
# self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
|
||||
|
||||
path = self.path_from_contentid(row[3], row[5], oncard)
|
||||
mime = mime_type_ext(path_to_ext(row[3]))
|
||||
mime = mime_type_ext(path_to_ext(path)) if path.find('kepub') == -1 else 'application/epub+zip'
|
||||
# debug_print("mime:", mime)
|
||||
|
||||
if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
|
||||
changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7])
|
||||
@ -206,7 +222,7 @@ class KOBO(USBMS):
|
||||
cursor.close()
|
||||
|
||||
cursor = connection.cursor()
|
||||
if ContentType == 6:
|
||||
if ContentType == 6 and self.dbversion < 8:
|
||||
# Delete the shortcover_pages first
|
||||
cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
|
||||
|
||||
@ -249,7 +265,7 @@ class KOBO(USBMS):
|
||||
path = self.normalize_path(path)
|
||||
# print "Delete file normalized path: " + path
|
||||
extension = os.path.splitext(path)[1]
|
||||
ContentType = self.get_content_type_from_extension(extension)
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(path)
|
||||
|
||||
ContentID = self.contentid_from_path(path, ContentType)
|
||||
|
||||
@ -332,9 +348,14 @@ class KOBO(USBMS):
|
||||
|
||||
def contentid_from_path(self, path, ContentType):
|
||||
if ContentType == 6:
|
||||
if self.dbversion < 8:
|
||||
ContentID = os.path.splitext(path)[0]
|
||||
# Remove the prefix on the file. it could be either
|
||||
ContentID = ContentID.replace(self._main_prefix, '')
|
||||
else:
|
||||
ContentID = path
|
||||
ContentID = ContentID.replace(self._main_prefix + '.kobo/kepub/', '')
|
||||
|
||||
if self._card_a_prefix is not None:
|
||||
ContentID = ContentID.replace(self._card_a_prefix, '')
|
||||
elif ContentType == 999: # HTML Files
|
||||
@ -350,6 +371,13 @@ class KOBO(USBMS):
|
||||
ContentID = ContentID.replace("\\", '/')
|
||||
return ContentID
|
||||
|
||||
def get_content_type_from_path(self, path):
|
||||
# Strictly speaking the ContentType could be 6 or 10
|
||||
# however newspapers have the same storage format
|
||||
if path.find('kepub') >= 0:
|
||||
ContentType = 6
|
||||
return ContentType
|
||||
|
||||
def get_content_type_from_extension(self, extension):
|
||||
if extension == '.kobo':
|
||||
# Kobo books do not have book files. They do have some images though
|
||||
@ -369,19 +397,22 @@ class KOBO(USBMS):
|
||||
print 'path from_contentid cardb'
|
||||
elif oncard == 'carda':
|
||||
path = path.replace("file:///mnt/sd/", self._card_a_prefix)
|
||||
# print "SD Card: " + filename
|
||||
# print "SD Card: " + path
|
||||
else:
|
||||
if ContentType == "6":
|
||||
if ContentType == "6" and self.dbversion < 8:
|
||||
# This is a hack as the kobo files do not exist
|
||||
# but the path is required to make a unique id
|
||||
# for calibre's reference
|
||||
path = self._main_prefix + path + '.kobo'
|
||||
# print "Path: " + path
|
||||
elif (ContentType == "6" or ContentType == "10") and self.dbversion >= 8:
|
||||
path = self._main_prefix + '.kobo/kepub/' + path
|
||||
# print "Internal: " + path
|
||||
else:
|
||||
# if path.startswith("file:///mnt/onboard/"):
|
||||
path = path.replace("file:///mnt/onboard/", self._main_prefix)
|
||||
path = path.replace("/mnt/onboard/", self._main_prefix)
|
||||
# print "Internal: " + filename
|
||||
# print "Internal: " + path
|
||||
|
||||
return path
|
||||
|
||||
@ -469,7 +500,7 @@ class KOBO(USBMS):
|
||||
book.device_collections = ['Im_Reading']
|
||||
|
||||
extension = os.path.splitext(book.path)[1]
|
||||
ContentType = self.get_content_type_from_extension(extension)
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||
|
||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||
datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
||||
@ -505,7 +536,7 @@ class KOBO(USBMS):
|
||||
book.device_collections = ['Read']
|
||||
|
||||
extension = os.path.splitext(book.path)[1]
|
||||
ContentType = self.get_content_type_from_extension(extension)
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
|
||||
|
||||
ContentID = self.contentid_from_path(book.path, ContentType)
|
||||
# datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
|
||||
|
@ -36,15 +36,15 @@ class N770(USBMS):
|
||||
|
||||
class N810(N770):
|
||||
name = 'Nokia 810 Device Interface'
|
||||
gui_name = 'Nokia 810'
|
||||
description = _('Communicate with the Nokia 810 internet tablet.')
|
||||
gui_name = 'Nokia 810/900'
|
||||
description = _('Communicate with the Nokia 810/900 internet tablet.')
|
||||
|
||||
PRODUCT_ID = [0x96]
|
||||
PRODUCT_ID = [0x96, 0x1c7]
|
||||
BCD = [0x316]
|
||||
|
||||
WINDOWS_MAIN_MEM = 'N810'
|
||||
WINDOWS_MAIN_MEM = ['N810', 'N900']
|
||||
|
||||
MAIN_MEMORY_VOLUME_LABEL = 'N810 Main Memory'
|
||||
MAIN_MEMORY_VOLUME_LABEL = 'Nokia Tablet Main Memory'
|
||||
|
||||
class E71X(USBMS):
|
||||
|
||||
|
@ -573,7 +573,10 @@ class XMLCache(object):
|
||||
ans = root.makeelement('{%s}text'%namespace, attrib=attrib,
|
||||
nsmap=root.nsmap)
|
||||
ans.tail = '\n'
|
||||
root[-1].tail = '\n' + '\t'
|
||||
if len(root) > 0:
|
||||
root[-1].tail = '\n\t'
|
||||
else:
|
||||
root.text = '\n\t'
|
||||
root.append(ans)
|
||||
if thumbnail and thumbnail[-1]:
|
||||
ans.text = '\n' + '\t\t'
|
||||
|
@ -14,7 +14,7 @@ from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre import extract, walk
|
||||
from calibre import extract, walk, isbytestring, filesystem_encoding
|
||||
from calibre.constants import __version__
|
||||
|
||||
DEBUG_README=u'''
|
||||
@ -77,6 +77,10 @@ class Plumber(object):
|
||||
:param input: Path to input file.
|
||||
:param output: Path to output file/directory
|
||||
'''
|
||||
if isbytestring(input):
|
||||
input = input.decode(filesystem_encoding)
|
||||
if isbytestring(output):
|
||||
output = output.decode(filesystem_encoding)
|
||||
self.original_input_arg = input
|
||||
self.input = os.path.abspath(input)
|
||||
self.output = os.path.abspath(output)
|
||||
|
@ -2043,12 +2043,16 @@ class MobiWriter(object):
|
||||
else :
|
||||
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
||||
|
||||
if True:
|
||||
# Apparently the CTOC must end with a null byte
|
||||
self._ctoc.write('\0')
|
||||
|
||||
ctoc = self._ctoc.getvalue()
|
||||
rec_count = len(self._ctoc_records)
|
||||
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
||||
(rec_count + 1, 'records, last record' if rec_count else 'record,', len(self._ctoc.getvalue())/655) )
|
||||
(rec_count + 1, 'records, last record' if rec_count else 'record,',
|
||||
len(ctoc)/655) )
|
||||
|
||||
return align_block(self._ctoc.getvalue())
|
||||
return align_block(ctoc)
|
||||
|
||||
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
|
||||
pos = 0xc0 + indxt.tell()
|
||||
|
@ -25,6 +25,7 @@ from calibre.translations.dynamic import translate
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||
from calibre import isbytestring
|
||||
|
||||
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
|
||||
|
||||
@ -404,7 +405,8 @@ class DirContainer(object):
|
||||
|
||||
def __init__(self, path, log):
|
||||
self.log = log
|
||||
path = unicode(path)
|
||||
if isbytestring(path):
|
||||
path = path.decode(filesystem_encoding)
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if ext == '.opf':
|
||||
self.opfname = os.path.basename(path)
|
||||
|
@ -6,8 +6,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import cStringIO
|
||||
|
||||
from calibre import fit_image
|
||||
|
||||
class RescaleImages(object):
|
||||
@ -19,13 +17,7 @@ class RescaleImages(object):
|
||||
self.rescale(qt=is_ok_to_use_qt())
|
||||
|
||||
def rescale(self, qt=True):
|
||||
from PyQt4.Qt import QImage, Qt
|
||||
from calibre.gui2 import pixmap_to_data
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
PILImage
|
||||
except ImportError:
|
||||
import Image as PILImage
|
||||
from calibre.utils.magick.draw import Image
|
||||
|
||||
is_image_collection = getattr(self.opts, 'is_image_collection', False)
|
||||
|
||||
@ -35,6 +27,7 @@ class RescaleImages(object):
|
||||
page_width, page_height = self.opts.dest.width, self.opts.dest.height
|
||||
page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72.
|
||||
page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72.
|
||||
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type.startswith('image'):
|
||||
ext = item.media_type.split('/')[-1].upper()
|
||||
@ -44,42 +37,25 @@ class RescaleImages(object):
|
||||
|
||||
raw = item.data
|
||||
if not raw: continue
|
||||
if qt:
|
||||
img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
|
||||
try:
|
||||
if not img.loadFromData(raw): continue
|
||||
img = Image()
|
||||
img.load(raw)
|
||||
except:
|
||||
continue
|
||||
width, height = img.width(), img.height()
|
||||
else:
|
||||
f = cStringIO.StringIO(raw)
|
||||
try:
|
||||
im = PILImage.open(f)
|
||||
except IOError:
|
||||
continue
|
||||
width, height = im.size
|
||||
|
||||
width, height = img.size
|
||||
|
||||
|
||||
scaled, new_width, new_height = fit_image(width, height,
|
||||
page_width, page_height)
|
||||
if scaled:
|
||||
data = None
|
||||
self.log('Rescaling image from %dx%d to %dx%d'%(
|
||||
width, height, new_width, new_height), item.href)
|
||||
if qt:
|
||||
img = img.scaled(new_width, new_height,
|
||||
Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
||||
data = pixmap_to_data(img, format=ext)
|
||||
else:
|
||||
try:
|
||||
im = im.resize((int(new_width), int(new_height)), PILImage.ANTIALIAS)
|
||||
of = cStringIO.StringIO()
|
||||
im.convert('RGB').save(of, ext)
|
||||
data = of.getvalue()
|
||||
img.size = (new_width, new_height)
|
||||
data = img.export(ext.lower())
|
||||
except:
|
||||
self.log.exception('Failed to rescale image')
|
||||
if data is not None:
|
||||
else:
|
||||
item.data = data
|
||||
item.unload_data_from_memory()
|
||||
|
||||
|
@ -9,7 +9,6 @@ from PyQt4.Qt import Qt
|
||||
|
||||
from calibre.gui2 import Dispatcher
|
||||
from calibre.gui2.tools import fetch_scheduled_recipe
|
||||
from calibre.utils.config import dynamic
|
||||
from calibre.gui2.actions import InterfaceAction
|
||||
|
||||
class FetchNewsAction(InterfaceAction):
|
||||
@ -60,9 +59,9 @@ class FetchNewsAction(InterfaceAction):
|
||||
return self.gui.job_exception(job)
|
||||
id = self.gui.library_view.model().add_news(pt.name, arg)
|
||||
self.gui.library_view.model().reset()
|
||||
sync = dynamic.get('news_to_be_synced', set([]))
|
||||
sync = self.gui.news_to_be_synced
|
||||
sync.add(id)
|
||||
dynamic.set('news_to_be_synced', sync)
|
||||
self.gui.news_to_be_synced = sync
|
||||
self.scheduler.recipe_downloaded(arg)
|
||||
self.gui.status_bar.show_message(arg['title'] + _(' fetched.'), 3000)
|
||||
self.gui.email_news(id)
|
||||
|
@ -1102,12 +1102,35 @@ class DeviceMixin(object): # {{{
|
||||
self.status_bar.show_message(_('Sending catalogs to device.'), 5000)
|
||||
|
||||
|
||||
@dynamic_property
|
||||
def news_to_be_synced(self):
|
||||
doc = 'Set of ids to be sent to device'
|
||||
def fget(self):
|
||||
ans = []
|
||||
try:
|
||||
ans = self.library_view.model().db.prefs.get('news_to_be_synced',
|
||||
[])
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return set(ans)
|
||||
|
||||
def fset(self, ids):
|
||||
try:
|
||||
self.library_view.model().db.prefs.set('news_to_be_synced',
|
||||
list(ids))
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
return property(fget=fget, fset=fset, doc=doc)
|
||||
|
||||
|
||||
def sync_news(self, send_ids=None, do_auto_convert=True):
|
||||
if self.device_connected:
|
||||
del_on_upload = config['delete_news_from_library_on_upload']
|
||||
settings = self.device_manager.device.settings()
|
||||
ids = list(dynamic.get('news_to_be_synced', set([]))) if send_ids is None else send_ids
|
||||
ids = list(self.news_to_be_synced) if send_ids is None else send_ids
|
||||
ids = [id for id in ids if self.library_view.model().db.has_id(id)]
|
||||
files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(
|
||||
ids, settings.format_map,
|
||||
@ -1139,7 +1162,7 @@ class DeviceMixin(object): # {{{
|
||||
for f in files:
|
||||
f.deleted_after_upload = del_on_upload
|
||||
if not files:
|
||||
dynamic.set('news_to_be_synced', set([]))
|
||||
self.news_to_be_synced = set([])
|
||||
return
|
||||
metadata = self.library_view.model().metadata_for(ids)
|
||||
names = []
|
||||
@ -1153,7 +1176,7 @@ class DeviceMixin(object): # {{{
|
||||
if mi.cover and os.access(mi.cover, os.R_OK):
|
||||
mi.thumbnail = self.cover_to_thumbnail(open(mi.cover,
|
||||
'rb').read())
|
||||
dynamic.set('news_to_be_synced', set([]))
|
||||
self.news_to_be_synced = set([])
|
||||
if config['upload_news_to_device'] and files:
|
||||
remove = ids if del_on_upload else []
|
||||
space = { self.location_manager.free[0] : None,
|
||||
@ -1347,6 +1370,7 @@ class DeviceMixin(object): # {{{
|
||||
# If it does not, then do it here.
|
||||
if not self.set_books_in_library(self.booklists(), reset=True):
|
||||
self.upload_booklists()
|
||||
with self.library_view.preserve_selected_books:
|
||||
self.book_on_device(None, reset=True)
|
||||
self.refresh_ondevice()
|
||||
|
||||
|
@ -90,10 +90,15 @@ class BookInfo(QDialog, Ui_BookInfo):
|
||||
row = row.row()
|
||||
if row == self.current_row:
|
||||
return
|
||||
info = self.view.model().get_book_info(row)
|
||||
if info is None:
|
||||
# Indicates books was deleted from library, or row numbers have
|
||||
# changed
|
||||
return
|
||||
|
||||
self.previous_button.setEnabled(False if row == 0 else True)
|
||||
self.next_button.setEnabled(False if row == self.view.model().rowCount(QModelIndex())-1 else True)
|
||||
self.current_row = row
|
||||
info = self.view.model().get_book_info(row)
|
||||
self.setWindowTitle(info[_('Title')])
|
||||
self.title.setText('<b>'+info.pop(_('Title')))
|
||||
comments = info.pop(_('Comments'), '')
|
||||
|
@ -374,6 +374,8 @@ class BooksModel(QAbstractTableModel): # {{{
|
||||
if isinstance(index, int):
|
||||
index = self.index(index, 0)
|
||||
data = self.current_changed(index, None, False)
|
||||
if data is None:
|
||||
return data
|
||||
row = index.row()
|
||||
data[_('Title')] = self.db.title(row)
|
||||
au = self.db.authors(row)
|
||||
|
@ -22,6 +22,26 @@ from calibre.gui2.library import DEFAULT_SORT
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre import force_unicode
|
||||
|
||||
class PreserveSelection(object): # {{{
|
||||
|
||||
'''
|
||||
Save the set of selected books at enter time. If at exit time there are no
|
||||
selected books, restore the previous selection.
|
||||
'''
|
||||
|
||||
def __init__(self, view):
|
||||
self.view = view
|
||||
self.selected_ids = []
|
||||
|
||||
def __enter__(self):
|
||||
self.selected_ids = self.view.get_selected_ids()
|
||||
|
||||
def __exit__(self, *args):
|
||||
current = self.view.get_selected_ids()
|
||||
if not current:
|
||||
self.view.select_rows(self.selected_ids, using_ids=True)
|
||||
# }}}
|
||||
|
||||
class BooksView(QTableView): # {{{
|
||||
|
||||
files_dropped = pyqtSignal(object)
|
||||
@ -58,6 +78,7 @@ class BooksView(QTableView): # {{{
|
||||
self.setSelectionBehavior(QAbstractItemView.SelectRows)
|
||||
self.setSortingEnabled(True)
|
||||
self.selectionModel().currentRowChanged.connect(self._model.current_changed)
|
||||
self.preserve_selected_books = PreserveSelection(self)
|
||||
|
||||
# {{{ Column Header setup
|
||||
self.can_add_columns = True
|
||||
@ -613,6 +634,16 @@ class BooksView(QTableView): # {{{
|
||||
sel.select(m.index(row, 0), m.index(row, max_col))
|
||||
sm.select(sel, sm.ClearAndSelect)
|
||||
|
||||
def get_selected_ids(self):
|
||||
ans = []
|
||||
m = self.model()
|
||||
for idx in self.selectedIndexes():
|
||||
r = idx.row()
|
||||
i = m.id(r)
|
||||
if i not in ans:
|
||||
ans.append(i)
|
||||
return ans
|
||||
|
||||
def close(self):
|
||||
self._model.close()
|
||||
|
||||
|
@ -716,6 +716,9 @@ View an ebook.
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
# Ensure viewer can continue to function if GUI is closed
|
||||
os.environ.pop('CALIBRE_WORKER_TEMP_DIR', None)
|
||||
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
pid = os.fork() if False and (islinux or isfreebsd) else -1
|
||||
|
@ -16,7 +16,7 @@ from PyQt4.Qt import QIcon, QFont, QLabel, QListWidget, QAction, \
|
||||
QTimer, QRect
|
||||
|
||||
from calibre.gui2 import NONE, error_dialog, pixmap_to_data, gprefs
|
||||
|
||||
from calibre.constants import isosx
|
||||
from calibre.gui2.filename_pattern_ui import Ui_Form
|
||||
from calibre import fit_image
|
||||
from calibre.utils.fonts import fontconfig
|
||||
@ -303,7 +303,8 @@ class FontFamilyModel(QAbstractListModel):
|
||||
return NONE
|
||||
if role == Qt.DisplayRole:
|
||||
return QVariant(family)
|
||||
if role == Qt.FontRole:
|
||||
if not isosx and role == Qt.FontRole:
|
||||
# Causes a Qt crash with some fonts on OS X
|
||||
return QVariant(QFont(family))
|
||||
return NONE
|
||||
|
||||
|
@ -380,7 +380,7 @@ class ResultCache(SearchQueryParser): # {{{
|
||||
field_count = 3
|
||||
else:
|
||||
try:
|
||||
qd = parse_date(query)
|
||||
qd = parse_date(query, as_utc=False)
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Date conversion error', self)
|
||||
if '-' in query:
|
||||
|
@ -509,7 +509,7 @@ class BrowseServer(object):
|
||||
|
||||
hide_sort = 'true' if dt == 'series' else 'false'
|
||||
if category == 'search':
|
||||
which = unhexlify(cid)
|
||||
which = unhexlify(cid).decode('utf-8')
|
||||
try:
|
||||
ids = self.search_cache('search:"%s"'%which)
|
||||
except:
|
||||
|
@ -124,7 +124,7 @@ class ContentServer(object):
|
||||
if want_mobile:
|
||||
return self.mobile()
|
||||
|
||||
return self.static('index.html')
|
||||
return self.browse_toplevel()
|
||||
|
||||
def old(self, **kwargs):
|
||||
return self.static('index.html')
|
||||
|
@ -435,3 +435,35 @@ And since I'm sure someone will ask: The reason adding/saving books are in separ
|
||||
|
||||
Finally, the reason calibre keep workers alive and idle instead of launching on demand is to workaround the slow startup time of python processes.
|
||||
|
||||
How do I run parts of |app| like news download and the content server on my own linux server?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
First, you must install |app| onto your linux server. If your server is using a modern linux distro, you should have no problems installing |app| onto it.
|
||||
|
||||
.. note::
|
||||
If you bought into the notion that a real server must run a decade old version of Debian, then you will have to jump through a few hoops. First, compile a newer version of glibc (>= 2.10) on your server from source. Then get the |app| linux binary tarball from the |app| google code page for your server architecture. Extract it into :file:`/opt/calibre`. Put your previously compiled glibc into :file:`/opt/calibre` as :file:`libc.so.6`. You can now run the calibre binaries from :file:`/opt/calibre`.
|
||||
|
||||
You can run the |app| server via the command::
|
||||
|
||||
/opt/calibre/calibre-server --with-library /path/to/the/library/you/want/to/share
|
||||
|
||||
You can download news and convert it into an ebook with the command::
|
||||
|
||||
/opt/calibre/ebook-convert "Title of news source.recipe" outputfile.epub
|
||||
|
||||
If you want to generate MOBI, use outputfile.mobi instead.
|
||||
|
||||
You can email downloaded news with the command::
|
||||
|
||||
/opt/calibre/calibre-smtp
|
||||
|
||||
I leave figuring out the exact command line as an exercise for the reader.
|
||||
|
||||
Finally, you can add downloaded news to the |app| library with::
|
||||
|
||||
/opt/calibre/calibredb add --with-library /path/to/library outfile.epub
|
||||
|
||||
Remember to read the command line documentation section of the |app| User Manual to learn more about these, and other commands.
|
||||
|
||||
.. note:: Some parts of calibre require a X server. If you're lucky, nothing you do will fall into this category, if not, you will have to look into using xvfb.
|
||||
|
||||
|
@ -7,7 +7,7 @@ being closed.
|
||||
"""
|
||||
import tempfile, os, atexit, binascii, cPickle
|
||||
|
||||
from calibre import __version__, __appname__
|
||||
from calibre.constants import __version__, __appname__
|
||||
|
||||
def cleanup(path):
|
||||
try:
|
||||
|
@ -105,7 +105,7 @@ def main():
|
||||
notifier.start()
|
||||
|
||||
result = func(*args, **kwargs)
|
||||
if result is not None:
|
||||
if result is not None and os.path.exists(os.path.dirname(resultf)):
|
||||
cPickle.dump(result, open(resultf, 'wb'), -1)
|
||||
|
||||
notifier.queue.put(None)
|
||||
|
@ -9,6 +9,7 @@ import os
|
||||
|
||||
from calibre.utils.magick import Image, DrawingWand, create_canvas
|
||||
from calibre.constants import __appname__, __version__
|
||||
from calibre.utils.config import tweaks
|
||||
from calibre import fit_image
|
||||
|
||||
def normalize_format_name(fmt):
|
||||
@ -112,6 +113,8 @@ def add_borders_to_image(img_data, left=0, top=0, right=0, bottom=0,
|
||||
return canvas.export(fmt)
|
||||
|
||||
def create_text_wand(font_size, font_path=None):
|
||||
if font_path is None:
|
||||
font_path = tweaks['generate_cover_title_font']
|
||||
if font_path is None:
|
||||
font_path = P('fonts/liberation/LiberationSerif-Bold.ttf')
|
||||
ans = DrawingWand()
|
||||
@ -203,8 +206,11 @@ def create_cover_page(top_lines, logo_path, width=590, height=750,
|
||||
bottom += line.bottom_margin
|
||||
bottom -= top_lines[-1].bottom_margin
|
||||
|
||||
foot_font = tweaks['generate_cover_foot_font']
|
||||
if not foot_font:
|
||||
foot_font = P('fonts/liberation/LiberationMono-Regular.ttf')
|
||||
vanity = create_text_arc(__appname__ + ' ' + __version__, 24,
|
||||
font=P('fonts/liberation/LiberationMono-Regular.ttf'))
|
||||
font=foot_font)
|
||||
lwidth, lheight = vanity.size
|
||||
left = int(max(0, (width - lwidth)/2.))
|
||||
top = height - lheight - 10
|
||||
|
@ -842,6 +842,9 @@ class BasicNewsRecipe(Recipe):
|
||||
except NotImplementedError:
|
||||
feeds = self.parse_feeds()
|
||||
|
||||
if not feeds:
|
||||
raise ValueError('No articles found, aborting')
|
||||
|
||||
#feeds = FeedCollection(feeds)
|
||||
|
||||
self.report_progress(0, _('Trying to download cover...'))
|
||||
|
Loading…
x
Reference in New Issue
Block a user