mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
74c85a9749
@ -1,13 +1,13 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from claibre import browser
|
||||||
import re
|
import re
|
||||||
import mechanize
|
|
||||||
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||||
title = u'The Daily Mirror'
|
title = u'The Daily Mirror'
|
||||||
description = 'News as provide by The Daily Mirror -UK'
|
description = 'News as provided by The Daily Mirror -UK'
|
||||||
|
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 7/4/12
|
# last updated 28/4/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
#cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
#cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||||
|
|
||||||
@ -15,89 +15,80 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 12
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
# auto_cleanup = True
|
||||||
#conversion_options = { 'linearize_tables' : True }
|
#conversion_options = { 'linearize_tables' : True }
|
||||||
|
|
||||||
|
|
||||||
#keep_only_tags = [
|
|
||||||
# dict(name='h1'),
|
keep_only_tags = [ dict(name='h1'),
|
||||||
# dict(name='div',attrs={'id' : 'body-content'}),
|
dict(name='div',attrs={'class' : 'lead-text'}),
|
||||||
#dict(name='div',atts={'class' : 'article-body'}),
|
dict(name='div',attrs={'class' : 'styleGroup clearfix'}),
|
||||||
|
dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}),
|
||||||
|
dict(name='figure',attrs={'class' : 'clearfix'}),
|
||||||
|
dict(name='div',attrs={'class' :'body '}),
|
||||||
|
|
||||||
#dict(attrs={'class' : ['article-attr','byline append-1','published']}),
|
#dict(attrs={'class' : ['article-attr','byline append-1','published']}),
|
||||||
#dict(name='p'),
|
#dict(name='p'),
|
||||||
# ]
|
|
||||||
|
|
||||||
#remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='title'),
|
|
||||||
dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
|
|
||||||
# dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
|
|
||||||
#dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
|
|
||||||
#dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# preprocess_regexps = [
|
|
||||||
#(re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
remove_tags = [
|
||||||
|
dict(attrs={'class' : 'comment'}),
|
||||||
|
dict(name='title'),
|
||||||
|
dict(name='ul',attrs={'class' : 'clearfix breadcrumbs '}),
|
||||||
|
dict(name='ul',attrs={'id' : 'login-201109171215'}),
|
||||||
|
dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),#'widget navigation breadcrumb widget-editable viziwyg-section-198 inpage-widget-80721 span-17','image-credit'
|
||||||
|
]
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
(re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
|
||||||
|
|
||||||
#preprocess_regexps = [
|
|
||||||
#(re.compile(r'Sponsored Links', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
(u'News',u'http://www.mirror.co.uk/news/rss.xml'),
|
||||||
|
(u'Sports',u'http://www.mirror.co.uk/sport/rss.xml'),
|
||||||
|
(u'3AM',u'http://www.mirror.co.uk/3am/rss.xml'),
|
||||||
|
(u'Lifestyle',u'http://www.mirror.co.uk/lifestyle/rss.xml')
|
||||||
|
|
||||||
(u'UK News', u'http://feed43.com/0287771688643868.xml')
|
|
||||||
,(u'Tech News', u'http://feed43.com/2455520588350501.xml')
|
|
||||||
,(u'Weird World','http://feed43.com/0863800333634654.xml')
|
|
||||||
,(u'Sport','http://feed43.com/7713243036546130.xml')
|
|
||||||
,(u'Sport : Boxing ','http://feed43.com/0414732220804255.xml')
|
|
||||||
,(u'Sport : Rugby Union','http://feed43.com/4710138762362383.xml')
|
|
||||||
,(u'Sport : Other','http://feed43.com/4501416886323415.xml')
|
|
||||||
,(u'TV and Film','http://feed43.com/5238302853765104.xml')
|
|
||||||
,(u'Celebs','http://feed43.com/8770061048844683.xml')
|
|
||||||
,(u'Life Style : Family','http://feed43.com/4356170742410338.xml')
|
|
||||||
,(u'Travel','http://feed43.com/1436576006476607.xml')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||||
]
|
]
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
|
h1{ font-size:medium;}
|
||||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||||
h1{ font-size:18px;}
|
|
||||||
img { display:block}
|
img { display:block}
|
||||||
'''
|
'''#
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
|
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
|
||||||
# look for the block containing the mirror button and url
|
# look for the block containing the mirror button and url
|
||||||
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'})
|
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_92.gif);'})
|
||||||
cov2 = str(cov)
|
cov2 = str(cov)
|
||||||
cov2='http://www.politicshome.com'+cov2[9:-142]
|
cov2='http://www.politicshome.com'+cov2[9:-142]
|
||||||
#cov2 now contains url of the page containing pic
|
#cov2 now contains url of the page containing pic
|
||||||
soup = self.index_to_soup(cov2)
|
soup = self.index_to_soup(cov2)
|
||||||
cov = soup.find(attrs={'id' : 'large'})
|
cov = soup.find(attrs={'id' : 'large'})
|
||||||
cov2 = str(cov)
|
cov2 = str(cov)
|
||||||
cov2=cov2[27:-18]
|
cov2=cov2[27:-18]
|
||||||
#cov2 now is pic url, now go back to original function
|
#cov2 now is pic url, now go back to original function
|
||||||
br = mechanize.Browser()
|
br = browser()
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
try:
|
try:
|
||||||
br.open_novisit(cov2)
|
br.open_novisit(cov2)
|
||||||
cover_url = cov2
|
cover_url = cov2
|
||||||
except:
|
except:
|
||||||
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
cover_url ='http://profile.ak.fbcdn.net/hprofile-ak-snc4/373019_6149699161_1710984811_n.jpg'
|
||||||
|
|
||||||
|
# print '******** string is ', cov2,' ***'
|
||||||
#cover_url = cov2
|
#cover_url = cov2
|
||||||
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
21
recipes/el_mundo_today.recipe
Normal file
21
recipes/el_mundo_today.recipe
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ElMundoTodayRecipe(BasicNewsRecipe):
|
||||||
|
title = 'El Mundo Today'
|
||||||
|
__author__ = 'atordo'
|
||||||
|
description = u'La actualidad del mañana'
|
||||||
|
category = 'Noticias, humor'
|
||||||
|
cover_url = 'http://www.elmundotoday.com/wp-content/themes/EarthlyTouch/images/logo.png'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
auto_cleanup = True
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'es'
|
||||||
|
use_embedded_content = True
|
||||||
|
|
||||||
|
feeds = [('El Mundo Today', 'http://www.elmundotoday.com/feed/')]
|
||||||
|
|
||||||
|
def get_broser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
br.set_handle_gzip(True)
|
||||||
|
return br
|
@ -18,16 +18,21 @@ __IncludeThumbnails__ = True
|
|||||||
__UseLife__ = True
|
__UseLife__ = True
|
||||||
# (HK only) It is to disable premium content (Default: False)
|
# (HK only) It is to disable premium content (Default: False)
|
||||||
__InclPremium__ = False
|
__InclPremium__ = False
|
||||||
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
|
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: False)
|
||||||
__ParsePFF__ = True
|
__ParsePF__ = False
|
||||||
|
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with text formats (Default: True) -- override __ParsePF__
|
||||||
|
__ParseTxt__ = True
|
||||||
|
# (HK only) Use mobile text version for some articles (Default: False)
|
||||||
|
__ParseSelectedMobile__ = False
|
||||||
# (HK only) Turn below to True if you wish hi-res images (Default: False)
|
# (HK only) Turn below to True if you wish hi-res images (Default: False)
|
||||||
__HiResImg__ = False
|
__HiResImg__ = False
|
||||||
# Override the date returned by the program if specifying a YYYYMMDD below
|
# Override the date returned by the program if specifying a YYYYMMDD below (not work if __ParseSelectedMobile__ is True and __UseLife__ is False)
|
||||||
__Date__ = ''
|
__Date__ = ''
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2012/04/24: improved parsing of news.mingpao.com content
|
||||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||||
@ -81,6 +86,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
category = 'Chinese, News, Hong Kong'
|
category = 'Chinese, News, Hong Kong'
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
||||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
|
remove_tags_before = dict(name='font', attrs={'color':['navy']})
|
||||||
keep_only_tags = [dict(name='h1'),
|
keep_only_tags = [dict(name='h1'),
|
||||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||||
@ -91,13 +97,17 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
dict(attrs={'class':['photo']}),
|
dict(attrs={'class':['photo']}),
|
||||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||||
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
|
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
|
||||||
dict(attrs={'class':['images']}) # for images from txt
|
dict(attrs={'class':['images']}), # for images from txt
|
||||||
|
dict(name='table', attrs={'width':['100%'], 'cellspacing':['0'], 'cellpadding':['0'], 'border':['0']}) # content table in pda site
|
||||||
]
|
]
|
||||||
if __KeepImages__:
|
if __KeepImages__:
|
||||||
remove_tags = [dict(name='style'),
|
remove_tags = [dict(name='style'),
|
||||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||||
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||||
|
dict(name='img', attrs={'alt':["明報網站", "按此列印", "關閉本視窗"]}), # non-article images in life.mingpao.com article
|
||||||
|
dict(name='img', attrs={'src':["../image/top_2.gif"]})
|
||||||
#dict(name='table') # for content fetched from life.mingpao.com
|
#dict(name='table') # for content fetched from life.mingpao.com
|
||||||
|
#dict(name='table', attrs={'width':['98%'], 'cellspacing':['0'], 'cellpadding':['0'], 'border':['0']})
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
remove_tags = [dict(name='style'),
|
remove_tags = [dict(name='style'),
|
||||||
@ -105,6 +115,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
dict(name='font', attrs={'size':['2'], 'color':['666666']}), # article date in life.mingpao.com article
|
||||||
dict(name='img'),
|
dict(name='img'),
|
||||||
#dict(name='table') # for content fetched from life.mingpao.com
|
#dict(name='table') # for content fetched from life.mingpao.com
|
||||||
|
#dict(name='table', attrs={'width':['98%'], 'cellspacing':['0'], 'cellpadding':['0'], 'border':['0']})
|
||||||
]
|
]
|
||||||
remove_attributes = ['width']
|
remove_attributes = ['width']
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
@ -118,7 +129,15 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: "<div id='newscontent'>"),
|
lambda match: "<div id='newscontent'>"),
|
||||||
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: "</b>")
|
lambda match: "</b>"),
|
||||||
|
(re.compile(r'<br><br><img src="http://pda.mingpao.com/image/shim.gif" width=11><br>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
(re.compile(r'<img src="http://pda.mingpao.com/image/mbup.gif" border=0>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
(re.compile(r'<img src="http://pda.mingpao.com/image/mbun.gif" border=0>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: ''),
|
||||||
|
#(re.compile(r'[<a href="HotNews1.cfm.+?">.+?</a>]', re.DOTALL|re.IGNORECASE),
|
||||||
|
#lambda match: '')
|
||||||
]
|
]
|
||||||
elif __Region__ == 'Vancouver':
|
elif __Region__ == 'Vancouver':
|
||||||
if __UseChineseTitle__ == True:
|
if __UseChineseTitle__ == True:
|
||||||
@ -222,6 +241,10 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%d")
|
return self.get_dtlocal().strftime("%d")
|
||||||
|
|
||||||
|
# Note: does not work with custom date given by __Date__
|
||||||
|
def get_weekday(self):
|
||||||
|
return self.get_dtlocal().weekday()
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
if __Region__ == 'Hong Kong':
|
if __Region__ == 'Hong Kong':
|
||||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||||
@ -260,7 +283,23 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
if __InclPremium__ == True:
|
# if __InclPremium__ == True:
|
||||||
|
# # parse column section articles directly from .txt files
|
||||||
|
# for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
|
# ]:
|
||||||
|
# articles = self.parse_section2_txt(url, keystr)
|
||||||
|
# if articles:
|
||||||
|
# feeds.append((title, articles))
|
||||||
|
#
|
||||||
|
# for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
|
# (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
|
# articles = self.parse_section(url)
|
||||||
|
# if articles:
|
||||||
|
# feeds.append((title, articles))
|
||||||
|
|
||||||
|
# new
|
||||||
|
if __InclPremium__ == True and (self.get_weekday() <> 6 or __ParseSelectedMobile__ == False):
|
||||||
|
# if both not on Sunday and not __ParseSelectedMobile__, go ahead
|
||||||
# parse column section articles directly from .txt files
|
# parse column section articles directly from .txt files
|
||||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
]:
|
]:
|
||||||
@ -268,17 +307,45 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
if __InclPremium__ == False or self.get_weekday() <> 6:
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
for title, url, seckey in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm', 'ja')]:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section_txt(url, seckey)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
else:
|
else:
|
||||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
if __InclPremium__ == True and __ParseSelectedMobile__ == True:
|
||||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
articles = self.parse_section_mobile('http://premium.mingpao.com/pda/palm', 'HotNews1.cfm?cat=ja&token=b218bc260b89c0&online=1')
|
||||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
if articles:
|
||||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
|
feeds.append((u'\u526f\u520a Supplement', articles))
|
||||||
|
else:
|
||||||
|
for title, url, seckey in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm', 'ja')]:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section_txt(url, seckey)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
for title, url, seckey in [(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm', 'em')]:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section_txt(url, seckey)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
# end of new
|
||||||
|
else:
|
||||||
|
for title, url, seckey in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm', 'ga'),
|
||||||
|
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm', 'gb'),
|
||||||
|
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm', 'gf'),
|
||||||
|
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm', 'mr')]:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section_txt(url, seckey)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
@ -287,10 +354,13 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
#if ed_articles:
|
#if ed_articles:
|
||||||
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||||
|
|
||||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
for title, url, seckey in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm', 'fa'),
|
||||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm', 'ca'),
|
||||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm', 'ta')]:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section_txt(url, seckey)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
@ -322,7 +392,9 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
if __InclPremium__ == True:
|
|
||||||
|
if __InclPremium__ == True and (self.get_weekday() <> 6 or __ParseSelectedMobile__ == False):
|
||||||
|
# if both not on Sunday or not __ParseSelectedMobile__, go ahead
|
||||||
# parse column section articles directly from .txt files
|
# parse column section articles directly from .txt files
|
||||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
]:
|
]:
|
||||||
@ -330,9 +402,33 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
if __InclPremium__ == False or self.get_weekday() <> 6:
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
for title, url, seckey in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm', 'ja')]:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section_txt(url, seckey)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
else:
|
||||||
|
if __InclPremium__ == True and __ParseSelectedMobile__ == True:
|
||||||
|
articles = self.parse_section_mobile('http://premium.mingpao.com/pda/palm', 'HotNews1.cfm?cat=ja&token=b218bc260b89c0&online=1')
|
||||||
|
if articles:
|
||||||
|
feeds.append((u'\u526f\u520a Supplement', articles))
|
||||||
|
else:
|
||||||
|
for title, url, seckey in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm', 'ja')]:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section_txt(url, seckey)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
|
||||||
|
for title, url, seckey in [(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm', 'em')]:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section_txt(url, seckey)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
@ -366,7 +462,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
# parse from news.mingpao.com
|
# parse from news.mingpao.com (web html)
|
||||||
def parse_section(self, url):
|
def parse_section(self, url):
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
@ -379,17 +475,57 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = a.get('href', False)
|
url = a.get('href', False)
|
||||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
# replace the url to the print-friendly version
|
# replace the url to the alternative version
|
||||||
if __ParsePFF__ == True:
|
if __ParsePF__ == True:
|
||||||
|
# printer-friendly option
|
||||||
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
|
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
|
||||||
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
||||||
url = re.sub('%2F.*%2F', '/', url)
|
url = re.sub('%2F.*%2F', '/', url)
|
||||||
|
if __InclPremium__ == True:
|
||||||
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||||
url = url.replace('%2Etxt', '_print.htm')
|
url = url.replace('%2Etxt', '_print.htm')
|
||||||
url = url.replace('%5F', '_')
|
url = url.replace('%5F', '_')
|
||||||
else:
|
else:
|
||||||
url = url.replace('.htm', '_print.htm')
|
url = url.replace('.htm', '_print.htm')
|
||||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
#if url not in included_urls and url.rfind('Redirect') == -1 and (__InclPremium__ == False or title.rfind(u'\u6536\u8cbb\u5167\u5bb9') == -1):
|
||||||
|
if url not in included_urls and (__InclPremium__ == True or title.rfind(u'\u6536\u8cbb\u5167\u5bb9') == -1):
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
# parse from news.mingpao.com (txt)
|
||||||
|
def parse_section_txt(self, url, ch):
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
divs.reverse()
|
||||||
|
for i in divs:
|
||||||
|
a = i.find('a', href = True)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a.get('href', False)
|
||||||
|
#print 'Base url: ', url
|
||||||
|
# replace the url to the alternative version
|
||||||
|
# text version
|
||||||
|
if url.rfind('Redirect') <> -1:
|
||||||
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
|
#print 'original url: ', url
|
||||||
|
url = re.sub(dateStr + '/../cfm/Redirect.cfm.*NewsFile=', 'ftp/WebNews2/', url)
|
||||||
|
url = re.sub('%2F', '/', url)
|
||||||
|
if __InclPremium__ == True:
|
||||||
|
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||||
|
url = url.replace('%2Etxt', '.txt')
|
||||||
|
url = url.replace('%5F', '_')
|
||||||
|
else:
|
||||||
|
# get the first two char in url as ch
|
||||||
|
seckey = url[0:2]
|
||||||
|
url = url.replace('.htm', '.txt')
|
||||||
|
url = 'http://news.mingpao.com/ftp/WebNews2/' + dateStr + '/' + ch + '/' + seckey + '/' + url
|
||||||
|
#print 'updated url: ', url
|
||||||
|
if url not in included_urls and (__InclPremium__ == True or title.rfind(u'\u6536\u8cbb\u5167\u5bb9') == -1):
|
||||||
|
#if url not in included_urls and (url.rfind('Redirect') == -1) and (__InclPremium__ == False or title.rfind(u'\u6536\u8cbb\u5167\u5bb9') == -1):
|
||||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
@ -437,6 +573,20 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
|
# parse from mobile version
|
||||||
|
def parse_section_mobile(self, base, page):
|
||||||
|
soup = self.index_to_soup(base + '/' + page)
|
||||||
|
a = soup.findAll('a', href=True)
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
for i in a:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = i.get('href', False)
|
||||||
|
if url not in included_urls and url.rfind('HotNews2.cfm') <> -1:
|
||||||
|
current_articles.append({'title': title, 'url': base + '/' + url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
return current_articles
|
||||||
|
|
||||||
# parse from www.mingpaovan.com
|
# parse from www.mingpaovan.com
|
||||||
def parse_section3(self, url, baseUrl):
|
def parse_section3(self, url, baseUrl):
|
||||||
self.get_fetchdate()
|
self.get_fetchdate()
|
||||||
@ -631,6 +781,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||||
new_html = new_html.replace(img, gifimg)
|
new_html = new_html.replace(img, gifimg)
|
||||||
except:
|
except:
|
||||||
|
if __ParseTxt__ == False:
|
||||||
# find the location of the first _
|
# find the location of the first _
|
||||||
pos = img.find('_')
|
pos = img.find('_')
|
||||||
if pos > -1:
|
if pos > -1:
|
||||||
@ -640,6 +791,12 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
else:
|
else:
|
||||||
# if not found, insert _ after "
|
# if not found, insert _ after "
|
||||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||||
|
else:
|
||||||
|
# insert to front
|
||||||
|
#print 'imgstr: ', img
|
||||||
|
pos = img.find('_')
|
||||||
|
new_html = new_html.replace(img[5:], '_' + img[5:])
|
||||||
|
|
||||||
elif url.rfind('life.mingpao.com') > -1:
|
elif url.rfind('life.mingpao.com') > -1:
|
||||||
imglist = re.findall('src=\'?.*?jpg\'', new_html)
|
imglist = re.findall('src=\'?.*?jpg\'', new_html)
|
||||||
br = mechanize.Browser()
|
br = mechanize.Browser()
|
||||||
@ -673,9 +830,13 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||||
#print 'Use hi-res img', newimg
|
#print 'Use hi-res img', newimg
|
||||||
new_html = new_html.replace(img, newimg)
|
new_html = new_html.replace(img, newimg)
|
||||||
|
# test
|
||||||
|
#print new_html
|
||||||
return new_html
|
return new_html
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
for mobiletitle in soup.findAll('font', attrs={'color': ['navy']}):
|
||||||
|
mobiletitle.name = 'h1'
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
@ -909,3 +1070,4 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
43
recipes/monbiot.recipe
Normal file
43
recipes/monbiot.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.monbiot.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class GeorgeMonbiot(BasicNewsRecipe):
|
||||||
|
title = 'George Monbiot - blog'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'Tell people something they know already and they will thank you for it. Tell people something new and they will hate you for it.'
|
||||||
|
publisher = 'George Monbiot'
|
||||||
|
category = 'news, politics, UK, World'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 200
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'utf8'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en_GB'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
publication_type = 'blog'
|
||||||
|
extra_css = """
|
||||||
|
body{font-family: Arial,Helvetica,sans-serif }
|
||||||
|
img{margin-bottom: 0.4em; display:block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['meta','link']),
|
||||||
|
dict(attrs={'class':'shareinpost'}),
|
||||||
|
dict(attrs={'id':'paging'})
|
||||||
|
]
|
||||||
|
remove_attributes=['lang']
|
||||||
|
keep_only_tags=[dict(attrs={'id':'content'})]
|
||||||
|
|
||||||
|
feeds = [(u'Articles', u'http://www.monbiot.com/feed/atom/')]
|
@ -2,20 +2,25 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010, matek09, matek09@gmail.com'
|
__copyright__ = '2010, matek09, matek09@gmail.com; 2012, admroz, a.rozewicki@gmail.com'
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
from string import capwords
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
class Newsweek(BasicNewsRecipe):
|
class Newsweek(BasicNewsRecipe):
|
||||||
|
|
||||||
|
# how many issues to go back, 0 means get the most current one
|
||||||
|
BACK_ISSUES = 1
|
||||||
|
|
||||||
EDITION = '0'
|
EDITION = '0'
|
||||||
DATE = None
|
DATE = None
|
||||||
YEAR = datetime.datetime.now().year
|
YEAR = datetime.datetime.now().year
|
||||||
|
|
||||||
title = u'Newsweek Polska'
|
title = u'Newsweek Polska'
|
||||||
__author__ = 'matek09'
|
__author__ = 'matek09, admroz'
|
||||||
description = 'Weekly magazine'
|
description = 'Weekly magazine'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'pl'
|
language = 'pl'
|
||||||
@ -25,6 +30,9 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
articles_are_obfuscated = True
|
articles_are_obfuscated = True
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Parses each article
|
||||||
|
#
|
||||||
def get_obfuscated_article(self, url):
|
def get_obfuscated_article(self, url):
|
||||||
br = self.get_browser()
|
br = self.get_browser()
|
||||||
br.open(url)
|
br.open(url)
|
||||||
@ -37,6 +45,27 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
info = main_section.find('ul', attrs={'class' : 'articleInfo'})
|
info = main_section.find('ul', attrs={'class' : 'articleInfo'})
|
||||||
authors = info.find('li').find('h4')
|
authors = info.find('li').find('h4')
|
||||||
article = main_section.find('div', attrs={'id' : 'article'})
|
article = main_section.find('div', attrs={'id' : 'article'})
|
||||||
|
|
||||||
|
# remove related articles box
|
||||||
|
related = article.find('div', attrs={'class' : 'relatedBox'})
|
||||||
|
if related is not None:
|
||||||
|
related.extract()
|
||||||
|
|
||||||
|
# remove div with social networking links and links to
|
||||||
|
# other articles in web version
|
||||||
|
for div in article.findAll('div'):
|
||||||
|
if div.find('span', attrs={'class' : 'google-plus'}):
|
||||||
|
div.extract()
|
||||||
|
|
||||||
|
for p in div.findAll('p'):
|
||||||
|
if p.find('span', attrs={'style' : 'color: rgb(255, 0, 0);'}):
|
||||||
|
p.extract()
|
||||||
|
continue
|
||||||
|
for a in p.findAll('a'):
|
||||||
|
if a.find('span', attrs={'style' : 'font-size: larger;'}):
|
||||||
|
a.extract()
|
||||||
|
|
||||||
|
|
||||||
html = unicode(title) + unicode(authors) + unicode(article)
|
html = unicode(title) + unicode(authors) + unicode(article)
|
||||||
next = main_section.find('li', attrs={'class' : 'next'})
|
next = main_section.find('li', attrs={'class' : 'next'})
|
||||||
|
|
||||||
@ -59,32 +88,34 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
self.temp_files[-1].close()
|
self.temp_files[-1].close()
|
||||||
return self.temp_files[-1].name
|
return self.temp_files[-1].name
|
||||||
|
|
||||||
def is_full(self, issue_soup):
|
|
||||||
while True:
|
|
||||||
main_section = issue_soup.find(id='mainSection')
|
|
||||||
next = main_section.find('li', attrs={'class' : 'next'})
|
|
||||||
if len(main_section.findAll(attrs={'class' : 'locked'})) > 1:
|
|
||||||
return False
|
|
||||||
elif next is None:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
issue_soup = self.index_to_soup(next.find('a')['href'])
|
|
||||||
|
|
||||||
def find_last_full_issue(self, archive_url):
|
#
|
||||||
|
# Goes back given number of issues. It also knows how to go back
|
||||||
|
# to the previous year if there are not enough issues in the current one
|
||||||
|
#
|
||||||
|
def find_last_issue(self, archive_url):
|
||||||
archive_soup = self.index_to_soup(archive_url)
|
archive_soup = self.index_to_soup(archive_url)
|
||||||
select = archive_soup.find('select', attrs={'id' : 'paper_issue_select'})
|
select = archive_soup.find('select', attrs={'id' : 'paper_issue_select'})
|
||||||
for option in select.findAll(lambda tag: tag.name == 'option' and tag.has_key('value')):
|
options = select.findAll(lambda tag: tag.name == 'option' and tag.has_key('value'))
|
||||||
|
|
||||||
|
# check if need to go back to previous year
|
||||||
|
if len(options) > self.BACK_ISSUES:
|
||||||
|
option = options[self.BACK_ISSUES];
|
||||||
self.EDITION = option['value'].replace('http://www.newsweek.pl/wydania/','')
|
self.EDITION = option['value'].replace('http://www.newsweek.pl/wydania/','')
|
||||||
issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
|
issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
|
||||||
if self.is_full(issue_soup):
|
else:
|
||||||
return
|
self.BACK_ISSUES = self.BACK_ISSUES - len(options)
|
||||||
|
|
||||||
self.YEAR = self.YEAR - 1
|
self.YEAR = self.YEAR - 1
|
||||||
self.find_last_full_issue(archive_url + ',' + str(self.YEAR))
|
self.find_last_issue(archive_url + ',' + str(self.YEAR))
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Looks for the last issue which we want to download. Then goes on each
|
||||||
|
# section and article and stores them (assigning to sections)
|
||||||
|
#
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
archive_url = 'http://www.newsweek.pl/wydania/archiwum'
|
archive_url = 'http://www.newsweek.pl/wydania/archiwum'
|
||||||
self.find_last_full_issue(archive_url)
|
self.find_last_issue(archive_url)
|
||||||
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
|
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
|
||||||
self.DATE = self.tag_to_string(soup.find('span', attrs={'class' : 'data'}))
|
self.DATE = self.tag_to_string(soup.find('span', attrs={'class' : 'data'}))
|
||||||
main_section = soup.find(id='mainSection')
|
main_section = soup.find(id='mainSection')
|
||||||
@ -93,32 +124,44 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
feeds = []
|
feeds = []
|
||||||
articles = {}
|
articles = {}
|
||||||
sections = []
|
sections = []
|
||||||
while True:
|
|
||||||
news_list = main_section.find('ul', attrs={'class' : 'newsList'})
|
|
||||||
for h2 in news_list.findAll('h2'):
|
|
||||||
|
|
||||||
|
news_list = main_section.find('ul', attrs={'class' : 'newsList'})
|
||||||
|
section = 'Inne'
|
||||||
|
|
||||||
|
for li in news_list.findAll('li'):
|
||||||
|
h3 = li.find('h3')
|
||||||
|
if h3 is not None:
|
||||||
|
section = capwords(self.tag_to_string(h3))
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
h2 = li.find('h2')
|
||||||
|
if h2 is not None:
|
||||||
article = self.create_article(h2)
|
article = self.create_article(h2)
|
||||||
category_div = h2.findNext('div', attrs={'class' : 'kategorie'})
|
if article is None :
|
||||||
section = self.tag_to_string(category_div)
|
continue
|
||||||
|
|
||||||
if articles.has_key(section):
|
if articles.has_key(section):
|
||||||
articles[section].append(article)
|
articles[section].append(article)
|
||||||
else:
|
else:
|
||||||
articles[section] = [article]
|
articles[section] = [article]
|
||||||
sections.append(section)
|
sections.append(section)
|
||||||
|
|
||||||
next = main_section.find('li', attrs={'class' : 'next'})
|
|
||||||
if next is None:
|
|
||||||
break
|
|
||||||
soup = self.index_to_soup(next.find('a')['href'])
|
|
||||||
main_section = soup.find(id='mainSection')
|
|
||||||
|
|
||||||
for section in sections:
|
for section in sections:
|
||||||
feeds.append((section, articles[section]))
|
feeds.append((section, articles[section]))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Creates each article metadata (skips locked ones). The content will
|
||||||
|
# be extracted later by other method (get_obfuscated_article).
|
||||||
|
#
|
||||||
def create_article(self, h2):
|
def create_article(self, h2):
|
||||||
article = {}
|
article = {}
|
||||||
a = h2.find('a')
|
a = h2.find('a')
|
||||||
|
if a is None:
|
||||||
|
return None
|
||||||
|
|
||||||
article['title'] = self.tag_to_string(a)
|
article['title'] = self.tag_to_string(a)
|
||||||
article['url'] = a['href']
|
article['url'] = a['href']
|
||||||
article['date'] = self.DATE
|
article['date'] = self.DATE
|
||||||
@ -129,7 +172,3 @@ class Newsweek(BasicNewsRecipe):
|
|||||||
else:
|
else:
|
||||||
article['description'] = ''
|
article['description'] = ''
|
||||||
return article
|
return article
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
import re, mechanize
|
import re, random
|
||||||
|
|
||||||
|
from calibre import browser
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'The Sun UK'
|
title = u'The Sun UK'
|
||||||
|
|
||||||
description = 'A Recipe for The Sun tabloid UK'
|
description = 'A Recipe for The Sun tabloid UK'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 7/4/12
|
# last updated 29/4/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 15
|
max_articles_per_feed = 15
|
||||||
@ -48,12 +50,10 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News','http://feed43.com/2517447382644748.xml'),
|
(u'News', u'http://www.thesun.co.uk/sol/homepage/news/rss'),
|
||||||
(u'Sport', u'http://feed43.com/4283846255668687.xml'),
|
(u'Sport', u'http://www.thesun.co.uk/sol/homepage/sport/rss'),
|
||||||
(u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
|
(u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
|
||||||
(u'Film',u'http://feed43.com/1307545221226200.xml'),
|
(u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
|
||||||
(u'Music',u'http://feed43.com/1701513435064132.xml'),
|
|
||||||
(u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
@ -61,14 +61,11 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
# look for the block containing the sun button and url
|
# look for the block containing the sun button and url
|
||||||
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_84.gif);'})
|
cov = soup.find(attrs={'style' : 'background-image: url(http://www.politicshome.com/images/sources/source_frontpage_button_84.gif);'})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#cov = soup.find(attrs={'id' : 'large'})
|
#cov = soup.find(attrs={'id' : 'large'})
|
||||||
cov2 = str(cov)
|
cov2 = str(cov)
|
||||||
|
|
||||||
cov2='http://www.politicshome.com'+cov2[9:-133]
|
cov2='http://www.politicshome.com'+cov2[9:-133]
|
||||||
#cov2 now contains url of the page containing pic
|
#cov2 now contains url of the page containing pic
|
||||||
|
|
||||||
#cov2 now contains url of the page containing pic
|
#cov2 now contains url of the page containing pic
|
||||||
soup = self.index_to_soup(cov2)
|
soup = self.index_to_soup(cov2)
|
||||||
cov = soup.find(attrs={'id' : 'large'})
|
cov = soup.find(attrs={'id' : 'large'})
|
||||||
@ -76,16 +73,21 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
cov2=cov2[27:-18]
|
cov2=cov2[27:-18]
|
||||||
#cov2 now is pic url, now go back to original function
|
#cov2 now is pic url, now go back to original function
|
||||||
|
|
||||||
br = mechanize.Browser()
|
br = browser()
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
try:
|
try:
|
||||||
br.open_novisit(cov2)
|
br.open_novisit(cov2)
|
||||||
cover_url = cov2
|
cover_url = cov2
|
||||||
except:
|
except:
|
||||||
cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
cover_url = random.choice((
|
||||||
|
'http://img.thesun.co.uk/multimedia/archive/00905/errorpage6_677961a_905507a.jpg'
|
||||||
|
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage7_677962a_905505a.jpg'
|
||||||
|
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage5_677960a_905512a.jpg'
|
||||||
|
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage2_677957a_905502a.jpg'
|
||||||
|
,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage3_677958a_905503a.jpg'
|
||||||
|
))
|
||||||
|
|
||||||
#cover_url = cov2
|
|
||||||
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
|
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
17
recipes/vice_magazine.recipe
Normal file
17
recipes/vice_magazine.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ViceESRecipe(BasicNewsRecipe):
|
||||||
|
title = u'Vice Magazine España'
|
||||||
|
__author__ = 'atordo'
|
||||||
|
description = u'La página web oficial de la revista Vice España'
|
||||||
|
category = u'noticias, fotografía, blogs, moda, arte, cine, música, literatura, tecnología'
|
||||||
|
cover_url = 'http://www.seeklogo.com/images/V/Vice-logo-668578AC94-seeklogo.com.gif'
|
||||||
|
oldest_article = 20
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
auto_cleanup = True
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'es'
|
||||||
|
|
||||||
|
feeds = [('Vice', 'http://www.vice.com/es/rss')]
|
||||||
|
|
@ -445,7 +445,7 @@ class LRFMetadataWriter(MetadataWriterPlugin):
|
|||||||
class MOBIMetadataWriter(MetadataWriterPlugin):
|
class MOBIMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
name = 'Set MOBI metadata'
|
name = 'Set MOBI metadata'
|
||||||
file_types = set(['mobi', 'prc', 'azw', 'azw4'])
|
file_types = set(['mobi', 'prc', 'azw', 'azw3', 'azw4'])
|
||||||
description = _('Set metadata in %s files')%'MOBI'
|
description = _('Set metadata in %s files')%'MOBI'
|
||||||
author = 'Marshall T. Vandegrift'
|
author = 'Marshall T. Vandegrift'
|
||||||
|
|
||||||
@ -539,7 +539,8 @@ from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput
|
|||||||
from calibre.ebooks.conversion.plugins.fb2_output import FB2Output
|
from calibre.ebooks.conversion.plugins.fb2_output import FB2Output
|
||||||
from calibre.ebooks.conversion.plugins.lit_output import LITOutput
|
from calibre.ebooks.conversion.plugins.lit_output import LITOutput
|
||||||
from calibre.ebooks.conversion.plugins.lrf_output import LRFOutput
|
from calibre.ebooks.conversion.plugins.lrf_output import LRFOutput
|
||||||
from calibre.ebooks.conversion.plugins.mobi_output import MOBIOutput
|
from calibre.ebooks.conversion.plugins.mobi_output import (MOBIOutput,
|
||||||
|
AZW3Output)
|
||||||
from calibre.ebooks.conversion.plugins.oeb_output import OEBOutput
|
from calibre.ebooks.conversion.plugins.oeb_output import OEBOutput
|
||||||
from calibre.ebooks.conversion.plugins.pdb_output import PDBOutput
|
from calibre.ebooks.conversion.plugins.pdb_output import PDBOutput
|
||||||
from calibre.ebooks.conversion.plugins.pdf_output import PDFOutput
|
from calibre.ebooks.conversion.plugins.pdf_output import PDFOutput
|
||||||
@ -580,7 +581,7 @@ plugins += [
|
|||||||
FB2Output,
|
FB2Output,
|
||||||
LITOutput,
|
LITOutput,
|
||||||
LRFOutput,
|
LRFOutput,
|
||||||
MOBIOutput,
|
MOBIOutput, AZW3Output,
|
||||||
OEBOutput,
|
OEBOutput,
|
||||||
PDBOutput,
|
PDBOutput,
|
||||||
PDFOutput,
|
PDFOutput,
|
||||||
@ -1253,6 +1254,15 @@ class StoreBeWriteStore(StoreBase):
|
|||||||
headquarters = 'US'
|
headquarters = 'US'
|
||||||
formats = ['EPUB', 'MOBI', 'PDF']
|
formats = ['EPUB', 'MOBI', 'PDF']
|
||||||
|
|
||||||
|
class StoreBiblioStore(StoreBase):
|
||||||
|
name = u'Библио.бг'
|
||||||
|
author = 'Alex Stanev'
|
||||||
|
description = u'Електронна книжарница за книги и списания във формати ePUB и PDF. Част от заглавията са с активна DRM защита.'
|
||||||
|
actual_plugin = 'calibre.gui2.store.stores.biblio_plugin:BiblioStore'
|
||||||
|
|
||||||
|
headquarters = 'BG'
|
||||||
|
formats = ['EPUB, PDF']
|
||||||
|
|
||||||
class StoreBookotekaStore(StoreBase):
|
class StoreBookotekaStore(StoreBase):
|
||||||
name = 'Bookoteka'
|
name = 'Bookoteka'
|
||||||
author = u'Tomasz Długosz'
|
author = u'Tomasz Długosz'
|
||||||
@ -1596,6 +1606,7 @@ plugins += [
|
|||||||
StoreBNStore,
|
StoreBNStore,
|
||||||
StoreBeamEBooksDEStore,
|
StoreBeamEBooksDEStore,
|
||||||
StoreBeWriteStore,
|
StoreBeWriteStore,
|
||||||
|
StoreBiblioStore,
|
||||||
StoreBookotekaStore,
|
StoreBookotekaStore,
|
||||||
StoreChitankaStore,
|
StoreChitankaStore,
|
||||||
StoreDieselEbooksStore,
|
StoreDieselEbooksStore,
|
||||||
|
@ -54,6 +54,15 @@ Run an embedded python interpreter.
|
|||||||
parser.add_option('-m', '--inspect-mobi', action='store_true',
|
parser.add_option('-m', '--inspect-mobi', action='store_true',
|
||||||
default=False,
|
default=False,
|
||||||
help='Inspect the MOBI file(s) at the specified path(s)')
|
help='Inspect the MOBI file(s) at the specified path(s)')
|
||||||
|
parser.add_option('--tweak-book', default=None,
|
||||||
|
help='Tweak the book (exports the book as a collection of HTML '
|
||||||
|
'files and metadata, which you can edit using standard HTML '
|
||||||
|
'editing tools, and then rebuilds the file from the edited HTML. '
|
||||||
|
'Makes no additional changes to the HTML, unlike a full calibre '
|
||||||
|
'conversion). Note that this tool will try to open the '
|
||||||
|
'folder containing the HTML files in the editor pointed to by the'
|
||||||
|
' EDITOR environment variable.')
|
||||||
|
|
||||||
parser.add_option('--test-build', help='Test binary modules in build',
|
parser.add_option('--test-build', help='Test binary modules in build',
|
||||||
action='store_true', default=False)
|
action='store_true', default=False)
|
||||||
|
|
||||||
@ -239,7 +248,9 @@ def main(args=sys.argv):
|
|||||||
prints('Inspecting:', path)
|
prints('Inspecting:', path)
|
||||||
inspect_mobi(path)
|
inspect_mobi(path)
|
||||||
print
|
print
|
||||||
|
elif opts.tweak_book:
|
||||||
|
from calibre.ebooks.tweak import tweak
|
||||||
|
tweak(opts.tweak_book)
|
||||||
elif opts.test_build:
|
elif opts.test_build:
|
||||||
from calibre.test_build import test
|
from calibre.test_build import test
|
||||||
test()
|
test()
|
||||||
|
@ -6,8 +6,32 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin
|
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
OptionRecommendation)
|
||||||
|
|
||||||
|
def remove_html_cover(oeb, log):
|
||||||
|
from calibre.ebooks.oeb.base import OEB_DOCS
|
||||||
|
|
||||||
|
if not oeb.metadata.cover \
|
||||||
|
or 'cover' not in oeb.guide:
|
||||||
|
return
|
||||||
|
href = oeb.guide['cover'].href
|
||||||
|
del oeb.guide['cover']
|
||||||
|
item = oeb.manifest.hrefs[href]
|
||||||
|
if item.spine_position is not None:
|
||||||
|
log.warn('Found an HTML cover: ', item.href, 'removing it.',
|
||||||
|
'If you find some content missing from the output MOBI, it '
|
||||||
|
'is because you misidentified the HTML cover in the input '
|
||||||
|
'document')
|
||||||
|
oeb.spine.remove(item)
|
||||||
|
if item.media_type in OEB_DOCS:
|
||||||
|
oeb.manifest.remove(item)
|
||||||
|
|
||||||
|
def extract_mobi(output_path, opts):
|
||||||
|
if opts.extract_to is not None:
|
||||||
|
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||||
|
ddir = opts.extract_to
|
||||||
|
inspect_mobi(output_path, ddir=ddir)
|
||||||
|
|
||||||
class MOBIOutput(OutputFormatPlugin):
|
class MOBIOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
@ -140,25 +164,6 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
# Fix up the periodical href to point to first section href
|
# Fix up the periodical href to point to first section href
|
||||||
toc.nodes[0].href = toc.nodes[0].nodes[0].href
|
toc.nodes[0].href = toc.nodes[0].nodes[0].href
|
||||||
|
|
||||||
def remove_html_cover(self):
|
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS
|
|
||||||
|
|
||||||
oeb = self.oeb
|
|
||||||
if not oeb.metadata.cover \
|
|
||||||
or 'cover' not in oeb.guide:
|
|
||||||
return
|
|
||||||
href = oeb.guide['cover'].href
|
|
||||||
del oeb.guide['cover']
|
|
||||||
item = oeb.manifest.hrefs[href]
|
|
||||||
if item.spine_position is not None:
|
|
||||||
self.log.warn('Found an HTML cover: ', item.href, 'removing it.',
|
|
||||||
'If you find some content missing from the output MOBI, it '
|
|
||||||
'is because you misidentified the HTML cover in the input '
|
|
||||||
'document')
|
|
||||||
oeb.spine.remove(item)
|
|
||||||
if item.media_type in OEB_DOCS:
|
|
||||||
self.oeb.manifest.remove(item)
|
|
||||||
|
|
||||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
from calibre.utils.config import tweaks
|
from calibre.utils.config import tweaks
|
||||||
from calibre.ebooks.mobi.writer2.resources import Resources
|
from calibre.ebooks.mobi.writer2.resources import Resources
|
||||||
@ -169,7 +174,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
mobi_type = 'old' # Amazon does not support KF8 periodicals
|
mobi_type = 'old' # Amazon does not support KF8 periodicals
|
||||||
create_kf8 = mobi_type in ('new', 'both')
|
create_kf8 = mobi_type in ('new', 'both')
|
||||||
|
|
||||||
self.remove_html_cover()
|
remove_html_cover(self.oeb, self.log)
|
||||||
resources = Resources(oeb, opts, self.is_periodical,
|
resources = Resources(oeb, opts, self.is_periodical,
|
||||||
add_fonts=create_kf8)
|
add_fonts=create_kf8)
|
||||||
self.check_for_periodical()
|
self.check_for_periodical()
|
||||||
@ -185,7 +190,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
) if create_kf8 else None
|
) if create_kf8 else None
|
||||||
if mobi_type == 'new':
|
if mobi_type == 'new':
|
||||||
kf8.write(output_path)
|
kf8.write(output_path)
|
||||||
self.extract_mobi(output_path, opts)
|
extract_mobi(output_path, opts)
|
||||||
return
|
return
|
||||||
|
|
||||||
self.log('Creating MOBI 6 output')
|
self.log('Creating MOBI 6 output')
|
||||||
@ -225,11 +230,72 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
writer = MobiWriter(opts, resources, kf8,
|
writer = MobiWriter(opts, resources, kf8,
|
||||||
write_page_breaks_after_item=write_page_breaks_after_item)
|
write_page_breaks_after_item=write_page_breaks_after_item)
|
||||||
writer(oeb, output_path)
|
writer(oeb, output_path)
|
||||||
self.extract_mobi(output_path, opts)
|
extract_mobi(output_path, opts)
|
||||||
|
|
||||||
|
class AZW3Output(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'AZW3 Output'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
file_type = 'azw3'
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='prefer_author_sort',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('When present, use author sort field as author.')
|
||||||
|
),
|
||||||
|
OptionRecommendation(name='no_inline_toc',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Don\'t add Table of Contents to the book. Useful if '
|
||||||
|
'the book has its own table of contents.')),
|
||||||
|
OptionRecommendation(name='toc_title', recommended_value=None,
|
||||||
|
help=_('Title for any generated in-line table of contents.')
|
||||||
|
),
|
||||||
|
OptionRecommendation(name='dont_compress',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Disable compression of the file contents.')
|
||||||
|
),
|
||||||
|
OptionRecommendation(name='personal_doc', recommended_value='[PDOC]',
|
||||||
|
help=_('Tag marking book to be filed with Personal Docs')
|
||||||
|
),
|
||||||
|
OptionRecommendation(name='mobi_toc_at_start',
|
||||||
|
recommended_value=False,
|
||||||
|
help=_('When adding the Table of Contents to the book, add it at the start of the '
|
||||||
|
'book instead of the end. Not recommended.')
|
||||||
|
),
|
||||||
|
OptionRecommendation(name='extract_to', recommended_value=None,
|
||||||
|
help=_('Extract the contents of the MOBI file to the'
|
||||||
|
' specified directory. If the directory already '
|
||||||
|
'exists, it will be deleted.')
|
||||||
|
),
|
||||||
|
OptionRecommendation(name='share_not_sync', recommended_value=False,
|
||||||
|
help=_('Enable sharing of book content via Facebook etc. '
|
||||||
|
' on the Kindle. WARNING: Using this feature means that '
|
||||||
|
' the book will not auto sync its last read position '
|
||||||
|
' on multiple devices. Complain to Amazon.')
|
||||||
|
),
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
|
from calibre.ebooks.mobi.writer2.resources import Resources
|
||||||
|
from calibre.ebooks.mobi.writer8.main import create_kf8_book
|
||||||
|
|
||||||
|
self.oeb, self.opts, self.log = oeb, opts, log
|
||||||
|
opts.mobi_periodical = self.is_periodical
|
||||||
|
passthrough = getattr(opts, 'mobi_passthrough', False)
|
||||||
|
|
||||||
|
resources = Resources(self.oeb, self.opts, self.is_periodical,
|
||||||
|
add_fonts=True, process_images=False)
|
||||||
|
if not passthrough:
|
||||||
|
remove_html_cover(self.oeb, self.log)
|
||||||
|
|
||||||
|
# Split on pagebreaks so that the resulting KF8 works better with
|
||||||
|
# calibre's viewer, which does not support CSS page breaks
|
||||||
|
from calibre.ebooks.oeb.transforms.split import Split
|
||||||
|
Split()(self.oeb, self.opts)
|
||||||
|
|
||||||
|
kf8 = create_kf8_book(self.oeb, self.opts, resources, for_joint=False)
|
||||||
|
|
||||||
|
kf8.write(output_path)
|
||||||
|
extract_mobi(output_path, opts)
|
||||||
|
|
||||||
def extract_mobi(self, output_path, opts):
|
|
||||||
if opts.extract_to is not None:
|
|
||||||
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
|
||||||
ddir = opts.extract_to
|
|
||||||
inspect_mobi(output_path, ddir=ddir)
|
|
||||||
|
|
||||||
|
@ -179,7 +179,11 @@ class HeuristicProcessor(object):
|
|||||||
for match in re.finditer(pat, search_text):
|
for match in re.finditer(pat, search_text):
|
||||||
ital_string = str(match.group('words'))
|
ital_string = str(match.group('words'))
|
||||||
#self.log.debug("italicising "+str(match.group(0))+" with <i>"+ital_string+"</i>")
|
#self.log.debug("italicising "+str(match.group(0))+" with <i>"+ital_string+"</i>")
|
||||||
|
try:
|
||||||
html = re.sub(re.escape(str(match.group(0))), '<i>%s</i>' % ital_string, html)
|
html = re.sub(re.escape(str(match.group(0))), '<i>%s</i>' % ital_string, html)
|
||||||
|
except OverflowError:
|
||||||
|
# match.group(0) was too large to be compiled into a regex
|
||||||
|
continue
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
@ -141,9 +141,10 @@ class MOBIFile(object):
|
|||||||
self.files.append(File(skel, skeleton, ftext, first_aid, sections))
|
self.files.append(File(skel, skeleton, ftext, first_aid, sections))
|
||||||
|
|
||||||
def dump_flows(self, ddir):
|
def dump_flows(self, ddir):
|
||||||
if self.fdst is None:
|
boundaries = [(0, len(self.raw_text))]
|
||||||
raise ValueError('This MOBI file has no FDST record')
|
if self.fdst is not None:
|
||||||
for i, x in enumerate(self.fdst.sections):
|
boundaries = self.fdst.sections
|
||||||
|
for i, x in enumerate(boundaries):
|
||||||
start, end = x
|
start, end = x
|
||||||
raw = self.raw_text[start:end]
|
raw = self.raw_text[start:end]
|
||||||
with open(os.path.join(ddir, 'flow%04d.txt'%i), 'wb') as f:
|
with open(os.path.join(ddir, 'flow%04d.txt'%i), 'wb') as f:
|
||||||
|
@ -234,6 +234,22 @@ class MetadataHeader(BookHeader):
|
|||||||
else:
|
else:
|
||||||
self.exth = None
|
self.exth = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def kf8_type(self):
|
||||||
|
if (self.mobi_version == 8 and getattr(self, 'skelidx', NULL_INDEX) !=
|
||||||
|
NULL_INDEX):
|
||||||
|
return u'standalone'
|
||||||
|
|
||||||
|
kf8_header_index = getattr(self.exth, 'kf8_header', None)
|
||||||
|
if kf8_header_index is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
if self.section_data(kf8_header_index-1) == b'BOUNDARY':
|
||||||
|
return u'joint'
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
def identity(self):
|
def identity(self):
|
||||||
self.stream.seek(60)
|
self.stream.seek(60)
|
||||||
ident = self.stream.read(8).upper()
|
ident = self.stream.read(8).upper()
|
||||||
|
84
src/calibre/ebooks/mobi/tweak.py
Normal file
84
src/calibre/ebooks/mobi/tweak.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, glob
|
||||||
|
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.ebooks.mobi import MobiError
|
||||||
|
from calibre.ebooks.mobi.reader.mobi6 import MobiReader
|
||||||
|
from calibre.ebooks.mobi.reader.headers import MetadataHeader
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
from calibre.ebooks import DRMError
|
||||||
|
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
|
||||||
|
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
|
||||||
|
from calibre.customize.ui import (plugin_for_input_format,
|
||||||
|
plugin_for_output_format)
|
||||||
|
from calibre.utils.ipc.simple_worker import fork_job
|
||||||
|
|
||||||
|
class BadFormat(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_explode(path, dest):
|
||||||
|
with open(path, 'rb') as stream:
|
||||||
|
mr = MobiReader(stream, default_log, None, None)
|
||||||
|
|
||||||
|
with CurrentDir(dest):
|
||||||
|
mr = Mobi8Reader(mr, default_log)
|
||||||
|
opf = os.path.abspath(mr())
|
||||||
|
|
||||||
|
return opf
|
||||||
|
|
||||||
|
def explode(path, dest, question=lambda x:True):
|
||||||
|
with open(path, 'rb') as stream:
|
||||||
|
raw = stream.read(3)
|
||||||
|
stream.seek(0)
|
||||||
|
if raw == b'TPZ':
|
||||||
|
raise BadFormat(_('This is not a MOBI file. It is a Topaz file.'))
|
||||||
|
|
||||||
|
try:
|
||||||
|
header = MetadataHeader(stream, default_log)
|
||||||
|
except MobiError:
|
||||||
|
raise BadFormat(_('This is not a MOBI file.'))
|
||||||
|
|
||||||
|
if header.encryption_type != 0:
|
||||||
|
raise DRMError(_('This file is locked with DRM. It cannot be tweaked.'))
|
||||||
|
|
||||||
|
kf8_type = header.kf8_type
|
||||||
|
|
||||||
|
if kf8_type is None:
|
||||||
|
raise BadFormat('This MOBI file does not contain a KF8 format book')
|
||||||
|
|
||||||
|
if kf8_type == 'joint':
|
||||||
|
if not question(_('This MOBI file contains both KF8 and '
|
||||||
|
'older Mobi6 data. Tweaking it will remove the Mobi6 data, which '
|
||||||
|
'means the file will not be usable on older Kindles. Are you '
|
||||||
|
'sure?')):
|
||||||
|
return None
|
||||||
|
|
||||||
|
return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path,
|
||||||
|
dest), no_output=True)['result']
|
||||||
|
|
||||||
|
def do_rebuild(opf, dest_path):
|
||||||
|
plumber = Plumber(opf, dest_path, default_log)
|
||||||
|
plumber.setup_options()
|
||||||
|
inp = plugin_for_input_format('azw3')
|
||||||
|
outp = plugin_for_output_format('azw3')
|
||||||
|
|
||||||
|
plumber.opts.mobi_passthrough = True
|
||||||
|
oeb = create_oebbook(default_log, opf, plumber.opts)
|
||||||
|
outp.convert(oeb, dest_path, inp, plumber.opts, default_log)
|
||||||
|
|
||||||
|
def rebuild(src_dir, dest_path):
|
||||||
|
opf = glob.glob(os.path.join(src_dir, '*.opf'))
|
||||||
|
if not opf:
|
||||||
|
raise ValueError('No OPF file found in %s'%src_dir)
|
||||||
|
opf = opf[0]
|
||||||
|
fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path),
|
||||||
|
no_output=True)
|
||||||
|
|
@ -25,6 +25,15 @@ from calibre.ebooks.mobi.writer2.indexer import Indexer
|
|||||||
WRITE_UNCROSSABLE_BREAKS = False
|
WRITE_UNCROSSABLE_BREAKS = False
|
||||||
NULL_INDEX = 0xffffffff
|
NULL_INDEX = 0xffffffff
|
||||||
|
|
||||||
|
FLIS = (b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+
|
||||||
|
b'\xff'*4)
|
||||||
|
|
||||||
|
def fcis(text_length):
|
||||||
|
fcis = b'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
|
||||||
|
fcis += pack(b'>I', text_length)
|
||||||
|
fcis += b'\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
|
||||||
|
return fcis
|
||||||
|
|
||||||
class MobiWriter(object):
|
class MobiWriter(object):
|
||||||
|
|
||||||
def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
|
def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
|
||||||
@ -208,14 +217,9 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
# FCIS/FLIS (Seems to serve no purpose)
|
# FCIS/FLIS (Seems to serve no purpose)
|
||||||
flis_number = len(self.records)
|
flis_number = len(self.records)
|
||||||
self.records.append(
|
self.records.append(FLIS)
|
||||||
b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+
|
|
||||||
b'\xff'*4)
|
|
||||||
fcis = b'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
|
|
||||||
fcis += pack(b'>I', self.text_length)
|
|
||||||
fcis += b'\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
|
|
||||||
fcis_number = len(self.records)
|
fcis_number = len(self.records)
|
||||||
self.records.append(fcis)
|
self.records.append(fcis(self.text_length))
|
||||||
|
|
||||||
# EOF record
|
# EOF record
|
||||||
self.records.append(b'\xE9\x8E\x0D\x0A')
|
self.records.append(b'\xE9\x8E\x0D\x0A')
|
||||||
@ -379,6 +383,12 @@ class MobiWriter(object):
|
|||||||
self.resources.serialize(self.records, used_images)
|
self.resources.serialize(self.records, used_images)
|
||||||
resource_record_count = len(self.records) - old
|
resource_record_count = len(self.records) - old
|
||||||
|
|
||||||
|
# FCIS/FLIS (Seems to serve no purpose)
|
||||||
|
flis_number = len(self.records)
|
||||||
|
self.records.append(FLIS)
|
||||||
|
fcis_number = len(self.records)
|
||||||
|
self.records.append(fcis(self.text_length))
|
||||||
|
|
||||||
# Insert KF8 records
|
# Insert KF8 records
|
||||||
self.records.append(b'BOUNDARY')
|
self.records.append(b'BOUNDARY')
|
||||||
kf8_header_index = len(self.records)
|
kf8_header_index = len(self.records)
|
||||||
@ -398,6 +408,8 @@ class MobiWriter(object):
|
|||||||
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
|
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
|
||||||
header_fields['fdst_record'] = NULL_INDEX
|
header_fields['fdst_record'] = NULL_INDEX
|
||||||
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
|
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
|
||||||
|
header_fields['flis_record'] = flis_number
|
||||||
|
header_fields['fcis_record'] = fcis_number
|
||||||
extra_data_flags = 0b1 # Has multibyte overlap bytes
|
extra_data_flags = 0b1 # Has multibyte overlap bytes
|
||||||
if self.primary_index_record_idx is not None:
|
if self.primary_index_record_idx is not None:
|
||||||
extra_data_flags |= 0b10
|
extra_data_flags |= 0b10
|
||||||
|
@ -19,9 +19,11 @@ PLACEHOLDER_GIF = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!\
|
|||||||
|
|
||||||
class Resources(object):
|
class Resources(object):
|
||||||
|
|
||||||
def __init__(self, oeb, opts, is_periodical, add_fonts=False):
|
def __init__(self, oeb, opts, is_periodical, add_fonts=False,
|
||||||
|
process_images=True):
|
||||||
self.oeb, self.log, self.opts = oeb, oeb.log, opts
|
self.oeb, self.log, self.opts = oeb, oeb.log, opts
|
||||||
self.is_periodical = is_periodical
|
self.is_periodical = is_periodical
|
||||||
|
self.process_images = process_images
|
||||||
|
|
||||||
self.item_map = {}
|
self.item_map = {}
|
||||||
self.records = []
|
self.records = []
|
||||||
@ -34,6 +36,8 @@ class Resources(object):
|
|||||||
self.add_resources(add_fonts)
|
self.add_resources(add_fonts)
|
||||||
|
|
||||||
def process_image(self, data):
|
def process_image(self, data):
|
||||||
|
if not self.process_images:
|
||||||
|
return data
|
||||||
return (mobify_image(data) if self.opts.mobi_keep_original_images else
|
return (mobify_image(data) if self.opts.mobi_keep_original_images else
|
||||||
rescale_image(data))
|
rescale_image(data))
|
||||||
|
|
||||||
|
@ -18,6 +18,14 @@ from calibre.ebooks.mobi.writer8.exth import build_exth
|
|||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
|
||||||
NULL_INDEX = 0xffffffff
|
NULL_INDEX = 0xffffffff
|
||||||
|
FLIS = b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+ b'\xff'*4
|
||||||
|
|
||||||
|
def fcis(text_length):
|
||||||
|
fcis = b'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x02\x00\x00\x00\x00'
|
||||||
|
fcis += pack(b'>L', text_length)
|
||||||
|
fcis += b'\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00'
|
||||||
|
fcis += b'\x28\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
|
||||||
|
return fcis
|
||||||
|
|
||||||
class MOBIHeader(Header): # {{{
|
class MOBIHeader(Header): # {{{
|
||||||
'''
|
'''
|
||||||
@ -115,7 +123,10 @@ class MOBIHeader(Header): # {{{
|
|||||||
exth_flags = DYN
|
exth_flags = DYN
|
||||||
|
|
||||||
# 132: Unknown
|
# 132: Unknown
|
||||||
unknown = zeroes(36)
|
unknown = zeroes(32)
|
||||||
|
|
||||||
|
# 164: Unknown
|
||||||
|
unknown_index = NULL
|
||||||
|
|
||||||
# 168: DRM
|
# 168: DRM
|
||||||
drm_offset = NULL
|
drm_offset = NULL
|
||||||
@ -130,13 +141,13 @@ class MOBIHeader(Header): # {{{
|
|||||||
fdst_record = DYN
|
fdst_record = DYN
|
||||||
fdst_count = DYN
|
fdst_count = DYN
|
||||||
|
|
||||||
# 200: FCI
|
# 200: FCIS
|
||||||
fcis_record = NULL
|
fcis_record = DYN
|
||||||
fcis_count
|
fcis_count = 1
|
||||||
|
|
||||||
# 208: FLIS
|
# 208: FLIS
|
||||||
flis_record = NULL
|
flis_record = DYN
|
||||||
flis_count
|
flis_count = 1
|
||||||
|
|
||||||
# 216: Unknown
|
# 216: Unknown
|
||||||
unknown3 = zeroes(8)
|
unknown3 = zeroes(8)
|
||||||
@ -193,7 +204,7 @@ HEADER_FIELDS = {'compression', 'text_length', 'last_text_record', 'book_type',
|
|||||||
'first_resource_record', 'exth_flags', 'fdst_record',
|
'first_resource_record', 'exth_flags', 'fdst_record',
|
||||||
'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
|
'fdst_count', 'ncx_index', 'chunk_index', 'skel_index',
|
||||||
'guide_index', 'exth', 'full_title', 'extra_data_flags',
|
'guide_index', 'exth', 'full_title', 'extra_data_flags',
|
||||||
'uid'}
|
'flis_record', 'fcis_record', 'uid'}
|
||||||
|
|
||||||
class KF8Book(object):
|
class KF8Book(object):
|
||||||
|
|
||||||
@ -241,6 +252,12 @@ class KF8Book(object):
|
|||||||
self.fdst_record = len(self.records)
|
self.fdst_record = len(self.records)
|
||||||
self.records.extend(writer.fdst_records)
|
self.records.extend(writer.fdst_records)
|
||||||
|
|
||||||
|
# FLIS/FCIS
|
||||||
|
self.flis_record = len(self.records)
|
||||||
|
self.records.append(FLIS)
|
||||||
|
self.fcis_record = len(self.records)
|
||||||
|
self.records.append(fcis(self.text_length))
|
||||||
|
|
||||||
# EOF
|
# EOF
|
||||||
self.records.append(b'\xe9\x8e\r\n') # EOF record
|
self.records.append(b'\xe9\x8e\r\n') # EOF record
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ from functools import partial
|
|||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import XHTML_NS
|
from calibre.ebooks.oeb.base import XHTML_NS, extract
|
||||||
from calibre.constants import ispy3
|
from calibre.constants import ispy3
|
||||||
from calibre.ebooks.mobi.utils import to_base
|
from calibre.ebooks.mobi.utils import to_base
|
||||||
|
|
||||||
@ -224,14 +224,24 @@ class Chunker(object):
|
|||||||
nroot.text = root.text
|
nroot.text = root.text
|
||||||
nroot.tail = '\n'
|
nroot.tail = '\n'
|
||||||
|
|
||||||
for tag in root.iterdescendants(etree.Element):
|
# Remove Comments and ProcessingInstructions as kindlegen seems to
|
||||||
# We are ignoring all non tag entities in the tree
|
# remove them as well
|
||||||
# like comments and processing instructions, as they make the
|
for tag in root.iterdescendants():
|
||||||
# chunking code even harder, for minimal gain.
|
if tag.tag in {etree.Comment, etree.ProcessingInstruction}:
|
||||||
elem = nroot.makeelement(tag.tag.rpartition('}')[-1],
|
extract(tag)
|
||||||
|
|
||||||
|
for tag in root.iterdescendants():
|
||||||
|
if tag.tag == etree.Entity:
|
||||||
|
elem = etree.Entity(tag.name)
|
||||||
|
else:
|
||||||
|
tn = tag.tag
|
||||||
|
if tn is not None:
|
||||||
|
tn = tn.rpartition('}')[-1]
|
||||||
|
elem = nroot.makeelement(tn,
|
||||||
attrib={k.rpartition('}')[-1]:v for k, v in
|
attrib={k.rpartition('}')[-1]:v for k, v in
|
||||||
tag.attrib.iteritems()})
|
tag.attrib.iteritems()})
|
||||||
elem.text, elem.tail = tag.text, tag.tail
|
elem.text = tag.text
|
||||||
|
elem.tail = tag.tail
|
||||||
parent = node_from_path(nroot, path_to_node(tag.getparent()))
|
parent = node_from_path(nroot, path_to_node(tag.getparent()))
|
||||||
parent.append(elem)
|
parent.append(elem)
|
||||||
|
|
||||||
@ -251,6 +261,11 @@ class Chunker(object):
|
|||||||
# Now loop over children
|
# Now loop over children
|
||||||
for child in list(tag):
|
for child in list(tag):
|
||||||
raw = tostring(child, with_tail=False)
|
raw = tostring(child, with_tail=False)
|
||||||
|
if child.tag == etree.Entity:
|
||||||
|
chunks.append(raw)
|
||||||
|
if child.tail:
|
||||||
|
chunks.extend(self.chunk_up_text(child.tail, aid))
|
||||||
|
continue
|
||||||
raw = close_self_closing_tags(raw)
|
raw = close_self_closing_tags(raw)
|
||||||
if len(raw) > CHUNK_SIZE and child.get('aid', None):
|
if len(raw) > CHUNK_SIZE and child.get('aid', None):
|
||||||
self.step_into_tag(child, chunks)
|
self.step_into_tag(child, chunks)
|
||||||
|
137
src/calibre/ebooks/tweak.py
Normal file
137
src/calibre/ebooks/tweak.py
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os, shlex, subprocess
|
||||||
|
|
||||||
|
from calibre import prints, as_unicode, walk
|
||||||
|
from calibre.constants import iswindows, __appname__
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.libunzip import extract as zipextract
|
||||||
|
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZIP_STORED
|
||||||
|
from calibre.utils.ipc.simple_worker import WorkerError
|
||||||
|
|
||||||
|
class Error(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def ask_cli_question(msg):
|
||||||
|
prints(msg, end=' [y/N]: ')
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
if iswindows:
|
||||||
|
import msvcrt
|
||||||
|
ans = msvcrt.getch()
|
||||||
|
else:
|
||||||
|
import tty, termios
|
||||||
|
old_settings = termios.tcgetattr(sys.stdin.fileno())
|
||||||
|
try:
|
||||||
|
tty.setraw(sys.stdin.fileno())
|
||||||
|
try:
|
||||||
|
ans = sys.stdin.read(1)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
ans = b''
|
||||||
|
finally:
|
||||||
|
termios.tcsetattr(sys.stdin.fileno(), termios.TCSADRAIN, old_settings)
|
||||||
|
print()
|
||||||
|
return ans == b'y'
|
||||||
|
|
||||||
|
def mobi_exploder(path, tdir, question=lambda x:True):
|
||||||
|
from calibre.ebooks.mobi.tweak import explode, BadFormat
|
||||||
|
try:
|
||||||
|
return explode(path, tdir, question=question)
|
||||||
|
except BadFormat as e:
|
||||||
|
raise Error(as_unicode(e))
|
||||||
|
|
||||||
|
def zip_exploder(path, tdir, question=lambda x:True):
|
||||||
|
zipextract(path, tdir)
|
||||||
|
for f in walk(tdir):
|
||||||
|
if f.lower().endswith('.opf'):
|
||||||
|
return f
|
||||||
|
raise Error('Invalid book: Could not find .opf')
|
||||||
|
|
||||||
|
def zip_rebuilder(tdir, path):
|
||||||
|
with ZipFile(path, 'w', compression=ZIP_DEFLATED) as zf:
|
||||||
|
# Write mimetype
|
||||||
|
mt = os.path.join(tdir, 'mimetype')
|
||||||
|
if os.path.exists(mt):
|
||||||
|
zf.write(mt, 'mimetype', compress_type=ZIP_STORED)
|
||||||
|
# Write everything else
|
||||||
|
exclude_files = {'.DS_Store', 'mimetype', 'iTunesMetadata.plist'}
|
||||||
|
for root, dirs, files in os.walk(tdir):
|
||||||
|
for fn in files:
|
||||||
|
if fn in exclude_files:
|
||||||
|
continue
|
||||||
|
absfn = os.path.join(root, fn)
|
||||||
|
zfn = os.path.relpath(absfn, tdir).replace(os.sep, '/')
|
||||||
|
zf.write(absfn, zfn)
|
||||||
|
|
||||||
|
def get_tools(fmt):
|
||||||
|
fmt = fmt.lower()
|
||||||
|
|
||||||
|
if fmt in {'mobi', 'azw', 'azw3'}:
|
||||||
|
from calibre.ebooks.mobi.tweak import rebuild
|
||||||
|
ans = mobi_exploder, rebuild
|
||||||
|
elif fmt in {'epub', 'htmlz'}:
|
||||||
|
ans = zip_exploder, zip_rebuilder
|
||||||
|
else:
|
||||||
|
ans = None, None
|
||||||
|
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def tweak(ebook_file):
|
||||||
|
''' Command line interface to the Tweak Book tool '''
|
||||||
|
fmt = ebook_file.rpartition('.')[-1].lower()
|
||||||
|
exploder, rebuilder = get_tools(fmt)
|
||||||
|
if exploder is None:
|
||||||
|
prints('Cannot tweak %s files. Supported formats are: EPUB, HTMLZ, AZW3, MOBI'
|
||||||
|
, file=sys.stderr)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
with TemporaryDirectory('_tweak_'+
|
||||||
|
os.path.basename(ebook_file).rpartition('.')[0]) as tdir:
|
||||||
|
try:
|
||||||
|
opf = exploder(ebook_file, tdir, question=ask_cli_question)
|
||||||
|
except WorkerError as e:
|
||||||
|
prints('Failed to unpack', ebook_file)
|
||||||
|
prints(e.orig_tb)
|
||||||
|
raise SystemExit(1)
|
||||||
|
except Error as e:
|
||||||
|
prints(as_unicode(e), file=sys.stderr)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
if opf is None:
|
||||||
|
# The question was answered with No
|
||||||
|
return
|
||||||
|
|
||||||
|
ed = os.environ.get('EDITOR', None)
|
||||||
|
proceed = False
|
||||||
|
if ed is None:
|
||||||
|
prints('Book extracted to', tdir)
|
||||||
|
prints('Make your tweaks and once you are done,', __appname__,
|
||||||
|
'will rebuild', ebook_file, 'from', tdir)
|
||||||
|
print()
|
||||||
|
proceed = ask_cli_question('Rebuild ' + ebook_file + '?')
|
||||||
|
else:
|
||||||
|
cmd = shlex.split(ed)
|
||||||
|
try:
|
||||||
|
subprocess.check_call(cmd + [tdir])
|
||||||
|
except:
|
||||||
|
prints(ed, 'failed, aborting...')
|
||||||
|
raise SystemExit(1)
|
||||||
|
proceed = True
|
||||||
|
|
||||||
|
if proceed:
|
||||||
|
prints('Rebuilding', ebook_file, 'please wait ...')
|
||||||
|
try:
|
||||||
|
rebuilder(tdir, ebook_file)
|
||||||
|
except WorkerError as e:
|
||||||
|
prints('Failed to rebuild', ebook_file)
|
||||||
|
prints(e.orig_tb)
|
||||||
|
raise SystemExit(1)
|
||||||
|
prints(ebook_file, 'successfully tweaked')
|
||||||
|
|
33
src/calibre/gui2/convert/azw3_output.py
Normal file
33
src/calibre/gui2/convert/azw3_output.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.gui2.convert.azw3_output_ui import Ui_Form
|
||||||
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
|
font_family_model = None
|
||||||
|
|
||||||
|
class PluginWidget(Widget, Ui_Form):
|
||||||
|
|
||||||
|
TITLE = _('AZW3 Output')
|
||||||
|
HELP = _('Options specific to')+' AZW3 '+_('output')
|
||||||
|
COMMIT_NAME = 'azw3_output'
|
||||||
|
ICON = I('mimetypes/mobi.png')
|
||||||
|
|
||||||
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
|
Widget.__init__(self, parent,
|
||||||
|
['prefer_author_sort', 'toc_title',
|
||||||
|
'mobi_ignore_margins', 'mobi_toc_at_start',
|
||||||
|
'dont_compress', 'no_inline_toc', 'share_not_sync',
|
||||||
|
'personal_doc']#, 'mobi_navpoints_only_deepest']
|
||||||
|
)
|
||||||
|
self.db, self.book_id = db, book_id
|
||||||
|
|
||||||
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
|
|
125
src/calibre/gui2/convert/azw3_output.ui
Normal file
125
src/calibre/gui2/convert/azw3_output.ui
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ui version="4.0">
|
||||||
|
<class>Form</class>
|
||||||
|
<widget class="QWidget" name="Form">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>0</x>
|
||||||
|
<y>0</y>
|
||||||
|
<width>588</width>
|
||||||
|
<height>342</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="windowTitle">
|
||||||
|
<string>Form</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
|
<item row="4" column="0" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="opt_prefer_author_sort">
|
||||||
|
<property name="text">
|
||||||
|
<string>Use author &sort for author</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
|
<widget class="QLabel" name="label">
|
||||||
|
<property name="text">
|
||||||
|
<string>&Title for Table of Contents:</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>opt_toc_title</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="1">
|
||||||
|
<widget class="QLineEdit" name="opt_toc_title"/>
|
||||||
|
</item>
|
||||||
|
<item row="5" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_dont_compress">
|
||||||
|
<property name="text">
|
||||||
|
<string>Disable compression of the file contents</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="0" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_no_inline_toc">
|
||||||
|
<property name="text">
|
||||||
|
<string>Do not add Table of Contents to book</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="2" column="0" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="opt_mobi_toc_at_start">
|
||||||
|
<property name="text">
|
||||||
|
<string>Put generated Table of Contents at &start of book instead of end</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="3" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_mobi_ignore_margins">
|
||||||
|
<property name="text">
|
||||||
|
<string>Ignore &margins</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="7" column="0" colspan="2">
|
||||||
|
<widget class="QGroupBox" name="groupBox">
|
||||||
|
<property name="title">
|
||||||
|
<string>Kindle options</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QVBoxLayout" name="verticalLayout">
|
||||||
|
<item>
|
||||||
|
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||||
|
<item>
|
||||||
|
<widget class="QLabel" name="label_3">
|
||||||
|
<property name="text">
|
||||||
|
<string>Personal Doc tag:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QLineEdit" name="opt_personal_doc"/>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="opt_share_not_sync">
|
||||||
|
<property name="text">
|
||||||
|
<string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<spacer name="verticalSpacer">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Vertical</enum>
|
||||||
|
</property>
|
||||||
|
<property name="sizeHint" stdset="0">
|
||||||
|
<size>
|
||||||
|
<width>20</width>
|
||||||
|
<height>40</height>
|
||||||
|
</size>
|
||||||
|
</property>
|
||||||
|
</spacer>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="8" column="0">
|
||||||
|
<spacer name="verticalSpacer_2">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Vertical</enum>
|
||||||
|
</property>
|
||||||
|
<property name="sizeHint" stdset="0">
|
||||||
|
<size>
|
||||||
|
<width>20</width>
|
||||||
|
<height>40</height>
|
||||||
|
</size>
|
||||||
|
</property>
|
||||||
|
</spacer>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
<resources/>
|
||||||
|
<connections/>
|
||||||
|
</ui>
|
54
src/calibre/gui2/store/stores/biblio_plugin.py
Normal file
54
src/calibre/gui2/store/stores/biblio_plugin.py
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2012, Alex Stanev <alex@stanev.org>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||||
|
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
|
||||||
|
from calibre.gui2.store.search_result import SearchResult
|
||||||
|
|
||||||
|
class BiblioStore(BasicStoreConfig, OpenSearchOPDSStore):
|
||||||
|
|
||||||
|
open_search_url = 'http://biblio.bg/feed.opds.php'
|
||||||
|
web_url = 'http://biblio.bg/'
|
||||||
|
|
||||||
|
def search(self, query, max_results=10, timeout=60):
|
||||||
|
# check for cyrillic symbols before performing search
|
||||||
|
uquery = unicode(query.strip(), 'utf-8')
|
||||||
|
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
|
||||||
|
if not reObj:
|
||||||
|
return
|
||||||
|
|
||||||
|
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
||||||
|
yield s
|
||||||
|
|
||||||
|
def get_details(self, search_result, timeout):
|
||||||
|
# get format and DRM status
|
||||||
|
from calibre import browser
|
||||||
|
from contextlib import closing
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
br = browser()
|
||||||
|
with closing(br.open(search_result.detail_item, timeout=timeout)) as nf:
|
||||||
|
idata = html.fromstring(nf.read())
|
||||||
|
search_result.formats = ''
|
||||||
|
if idata.xpath('.//span[@class="format epub"]'):
|
||||||
|
search_result.formats = 'EPUB'
|
||||||
|
|
||||||
|
if idata.xpath('.//span[@class="format pdf"]'):
|
||||||
|
if search_result.formats == '':
|
||||||
|
search_result.formats = 'PDF'
|
||||||
|
else:
|
||||||
|
search_result.formats.join(', PDF')
|
||||||
|
|
||||||
|
if idata.xpath('.//span[@class="format nodrm-icon"]'):
|
||||||
|
search_result.drm = SearchResult.DRM_UNLOCKED
|
||||||
|
else:
|
||||||
|
search_result.drm = SearchResult.DRM_LOCKED
|
||||||
|
|
||||||
|
return True
|
@ -22,7 +22,7 @@ It can convert every input format in the following list, to every output format.
|
|||||||
|
|
||||||
*Input Formats:* CBZ, CBR, CBC, CHM, DJVU, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
|
*Input Formats:* CBZ, CBR, CBC, CHM, DJVU, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
|
||||||
|
|
||||||
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ
|
*Output Formats:* AZW3, EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ
|
||||||
|
|
||||||
.. note ::
|
.. note ::
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ It can convert every input format in the following list, to every output format.
|
|||||||
|
|
||||||
What are the best source formats to convert?
|
What are the best source formats to convert?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
In order of decreasing preference: LIT, MOBI, EPUB, FB2, HTML, PRC, RTF, PDB, TXT, PDF
|
In order of decreasing preference: LIT, MOBI, AZW, EPUB, AZW3, FB2, HTML, PRC, RTF, PDB, TXT, PDF
|
||||||
|
|
||||||
I converted a PDF file, but the result has various problems?
|
I converted a PDF file, but the result has various problems?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -245,7 +245,7 @@ The following functions are available in addition to those described in single-f
|
|||||||
* ``current_library_name() -- `` return the last name on the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_name()'}``.
|
* ``current_library_name() -- `` return the last name on the path to the current calibre library. This function can be called in template program mode using the template ``{:'current_library_name()'}``.
|
||||||
* ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string.
|
* ``days_between(date1, date2)`` -- return the number of days between ``date1`` and ``date2``. The number is positive if ``date1`` is greater than ``date2``, otherwise negative. If either ``date1`` or ``date2`` are not dates, the function returns the empty string.
|
||||||
* ``divide(x, y)`` -- returns x / y. Throws an exception if either x or y are not numbers.
|
* ``divide(x, y)`` -- returns x / y. Throws an exception if either x or y are not numbers.
|
||||||
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables.
|
* ``eval(string)`` -- evaluates the string as a program, passing the local variables (those ``assign`` ed to). This permits using the template processor to construct complex results from local variables. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. Note also that prefixes and suffixes (the "|prefix|suffix" syntax) cannot be used in the argument to this function when using template program mode.
|
||||||
* ``field(name)`` -- returns the metadata field named by ``name``.
|
* ``field(name)`` -- returns the metadata field named by ``name``.
|
||||||
* ``first_non_empty(value, value, ...)`` -- returns the first value that is not empty. If all values are empty, then the empty value is returned. You can have as many values as you want.
|
* ``first_non_empty(value, value, ...)`` -- returns the first value that is not empty. If all values are empty, then the empty value is returned. You can have as many values as you want.
|
||||||
* ``format_date(x, date_format)`` -- format_date(val, format_string) -- format the value, which must be a date field, using the format_string, returning a string. The formatting codes are::
|
* ``format_date(x, date_format)`` -- format_date(val, format_string) -- format the value, which must be a date field, using the format_string, returning a string. The formatting codes are::
|
||||||
@ -306,7 +306,7 @@ The following functions are available in addition to those described in single-f
|
|||||||
* ``substr(str, start, end)`` -- returns the ``start``'th through the ``end``'th characters of ``str``. The first character in ``str`` is the zero'th character. If end is negative, then it indicates that many characters counting from the right. If end is zero, then it indicates the last character. For example, ``substr('12345', 1, 0)`` returns ``'2345'``, and ``substr('12345', 1, -1)`` returns ``'234'``.
|
* ``substr(str, start, end)`` -- returns the ``start``'th through the ``end``'th characters of ``str``. The first character in ``str`` is the zero'th character. If end is negative, then it indicates that many characters counting from the right. If end is zero, then it indicates the last character. For example, ``substr('12345', 1, 0)`` returns ``'2345'``, and ``substr('12345', 1, -1)`` returns ``'234'``.
|
||||||
* ``subtract(x, y)`` -- returns x - y. Throws an exception if either x or y are not numbers.
|
* ``subtract(x, y)`` -- returns x - y. Throws an exception if either x or y are not numbers.
|
||||||
* ``today()`` -- return a date string for today. This value is designed for use in format_date or days_between, but can be manipulated like any other string. The date is in ISO format.
|
* ``today()`` -- return a date string for today. This value is designed for use in format_date or days_between, but can be manipulated like any other string. The date is in ISO format.
|
||||||
* ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value.
|
* ``template(x)`` -- evaluates x as a template. The evaluation is done in its own context, meaning that variables are not shared between the caller and the template evaluation. Because the `{` and `}` characters are special, you must use `[[` for the `{` character and `]]` for the '}' character; they are converted automatically. For example, ``template('[[title_sort]]') will evaluate the template ``{title_sort}`` and return its value. Note also that prefixes and suffixes (the "|prefix|suffix" syntax) cannot be used in the argument to this function when using template program mode.
|
||||||
|
|
||||||
.. _template_functions_reference:
|
.. _template_functions_reference:
|
||||||
|
|
||||||
|
@ -387,7 +387,7 @@ def _prefs():
|
|||||||
help=_('The language in which to display the user interface'))
|
help=_('The language in which to display the user interface'))
|
||||||
c.add_opt('output_format', default='EPUB',
|
c.add_opt('output_format', default='EPUB',
|
||||||
help=_('The default output format for ebook conversions.'))
|
help=_('The default output format for ebook conversions.'))
|
||||||
c.add_opt('input_format_order', default=['EPUB', 'MOBI', 'LIT', 'PRC',
|
c.add_opt('input_format_order', default=['EPUB', 'AZW3', 'MOBI', 'LIT', 'PRC',
|
||||||
'FB2', 'HTML', 'HTM', 'XHTM', 'SHTML', 'XHTML', 'ZIP', 'ODT', 'RTF', 'PDF',
|
'FB2', 'HTML', 'HTM', 'XHTM', 'SHTML', 'XHTML', 'ZIP', 'ODT', 'RTF', 'PDF',
|
||||||
'TXT'],
|
'TXT'],
|
||||||
help=_('Ordered list of formats to prefer for input.'))
|
help=_('Ordered list of formats to prefer for input.'))
|
||||||
|
@ -217,7 +217,9 @@ class BuiltinTemplate(BuiltinFormatterFunction):
|
|||||||
'characters are special, you must use [[ for the { character and '
|
'characters are special, you must use [[ for the { character and '
|
||||||
']] for the } character; they are converted automatically. '
|
']] for the } character; they are converted automatically. '
|
||||||
'For example, template(\'[[title_sort]]\') will evaluate the '
|
'For example, template(\'[[title_sort]]\') will evaluate the '
|
||||||
'template {title_sort} and return its value.')
|
'template {title_sort} and return its value. Note also that '
|
||||||
|
'prefixes and suffixes (the "|prefix|suffix" syntax) cannot be '
|
||||||
|
'used in the argument to this function when using template program mode.')
|
||||||
|
|
||||||
def evaluate(self, formatter, kwargs, mi, locals, template):
|
def evaluate(self, formatter, kwargs, mi, locals, template):
|
||||||
template = template.replace('[[', '{').replace(']]', '}')
|
template = template.replace('[[', '{').replace(']]', '}')
|
||||||
@ -230,7 +232,12 @@ class BuiltinEval(BuiltinFormatterFunction):
|
|||||||
__doc__ = doc = _('eval(template) -- evaluates the template, passing the local '
|
__doc__ = doc = _('eval(template) -- evaluates the template, passing the local '
|
||||||
'variables (those \'assign\'ed to) instead of the book metadata. '
|
'variables (those \'assign\'ed to) instead of the book metadata. '
|
||||||
' This permits using the template processor to construct complex '
|
' This permits using the template processor to construct complex '
|
||||||
'results from local variables.')
|
'results from local variables. Because the { and } '
|
||||||
|
'characters are special, you must use [[ for the { character and '
|
||||||
|
']] for the } character; they are converted automatically. '
|
||||||
|
'Note also that prefixes and suffixes (the "|prefix|suffix" syntax) '
|
||||||
|
'cannot be used in the argument to this function when using '
|
||||||
|
'template program mode.')
|
||||||
|
|
||||||
def evaluate(self, formatter, kwargs, mi, locals, template):
|
def evaluate(self, formatter, kwargs, mi, locals, template):
|
||||||
from formatter import EvalFormatter
|
from formatter import EvalFormatter
|
||||||
|
Loading…
x
Reference in New Issue
Block a user