mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Sync to trunk.
This commit is contained in:
commit
91b181e5a9
173
Changelog.yaml
173
Changelog.yaml
@ -19,6 +19,179 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.40
|
||||
date: 2012-02-17
|
||||
|
||||
new features:
|
||||
- title: "Amazon metadata download: Support the new 'Book Description' section that Amazon publishes for some books. Also workaround the amazon US servers occasionally returning broken markup leading to calibre not finding any matches for books on Amazon."
|
||||
|
||||
- title: "Kindle driver: Add an option to allow using page counts stored in a custom column. Go to Preferences->Plugins and customize the Kindle driver, to tell it to use a custom column to get page count data. See http://www.mobileread.com/forums/showpost.php?p=1963075&postcount=215 for details."
|
||||
|
||||
- title: "Template language: Add a current_library_name() function that can be used to return the name of the currently opened library in calibre"
|
||||
|
||||
- title: "Driver for Xperia Neo and PocketBook A10"
|
||||
tickets: [930788]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix regression in 0.8.36 that caused the calibredb command to not properly refresh format information in standalone calibre-server processes"
|
||||
|
||||
- title: "Fix regression in 0.8.39 that broke getting covers from some epub files on OS X."
|
||||
tickets: [932507]
|
||||
|
||||
- title: "Reading metadata from HTML files: Do not take a very long time for very large HTML files. Also fix reading metadata from meta tags with multiple spaces before the content attribute."
|
||||
tickets: [932262]
|
||||
|
||||
- title: "EPUB Output: Fix splitting breaking internal links in the epub, if the links pointed to files with URL unsafe characters in their file names."
|
||||
tickets: [929966]
|
||||
|
||||
- title: "Fix auto adding not leaving languages field blank when book has no defined laguage"
|
||||
tickets: [930648]
|
||||
|
||||
improved recipes:
|
||||
- Samanyolu Haber
|
||||
- Kurier
|
||||
- Le devoir
|
||||
- Daily Mirror
|
||||
- Common Dreams
|
||||
- Pescanik
|
||||
|
||||
new recipes:
|
||||
- title: Asian Review of Books
|
||||
author: Darko Miletic
|
||||
|
||||
- title: Albert Mohler, Desiring God, Living Stones and Resurgence
|
||||
author: Peter Grungi
|
||||
|
||||
- title: Novinite BG
|
||||
author: M3 Web
|
||||
|
||||
- title: Catholic Daily Readings
|
||||
author: adoucette
|
||||
|
||||
- title: Consortium News and Microwave and RF magazine
|
||||
author: kiavash
|
||||
|
||||
- version: 0.8.39
|
||||
date: 2012-02-10
|
||||
|
||||
new features:
|
||||
- title: "Auto-adding: Add an option to check for duplicates when auto adding."
|
||||
tickets: [926962]
|
||||
|
||||
- title: "Content server: Export a second record via mDNS that points to the full OPDS feed in addition to the one pointing to the Stanza feed. The new record is of type _calibre._tcp."
|
||||
tickets: [929304]
|
||||
|
||||
- title: "Allow specifying a set of categories that are not partitioned even if they contain a large number of items in the Tag Browser. Preference is available under Look & Feel->Tag Browser"
|
||||
|
||||
- title: "Allow setting a URL prefix for the content server that run embedded in the calibre GUI as well."
|
||||
tickets: [928905]
|
||||
|
||||
- title: "Allow output of identifiers data in CSV/XML/BiBTeX catalogs"
|
||||
tickets: [927737]
|
||||
|
||||
- title: "Driver for Motorola Droid XT910, Nokia E71 and HTC EVO 3D."
|
||||
tickets: [928202, 927818, 929400]
|
||||
|
||||
- title: "Cut down the time taken to launch worker processes by 40%"
|
||||
|
||||
- title: "You can now configure the calibre settings for the currently connected device by right clicking on the device icon in the toolbar, instead of having to go through Preferences->Plugins"
|
||||
|
||||
bug fixes:
|
||||
- title: "Auto-adding: Do not add incomplete files when files are downloaded directly into the auto add folder."
|
||||
tickets: [926578]
|
||||
|
||||
- title: "When running multiple delete from device jobs, fix the device view sometimes marking the wrong books as being deleted, after the first delete job completes."
|
||||
tickets: [927972]
|
||||
|
||||
- title: "MOBI Input: Handle files that have spurious closing </body> and/or </html> tags in their markup."
|
||||
tickets: [925833]
|
||||
|
||||
- title: "RTF Input: Strip out false color specifications, as they cause artifacts when converted to MOBI"
|
||||
|
||||
improved recipes:
|
||||
- Updated Postmedia publications
|
||||
- Foreign Affairs
|
||||
- Read It Later
|
||||
- Microwave Journal
|
||||
- taggeschau.de
|
||||
|
||||
new recipes:
|
||||
- title: Vancouver Province and Windsor Star
|
||||
author: Nick Redding
|
||||
|
||||
- title: Onda Rock
|
||||
author: faber1971
|
||||
|
||||
- title: Il Manifesto
|
||||
author: Giacomo Lacava
|
||||
|
||||
- version: 0.8.38
|
||||
date: 2012-02-03
|
||||
|
||||
new features:
|
||||
- title: "Implement the ability to automatically add books to calibre from a specified folder."
|
||||
type: major
|
||||
description: "calibre can now watch a folder on your computer and instantly add any files you put there to the calibre library as new books. You can tell calibre which folder to watch via Preferences->Adding Books->Automatic Adding."
|
||||
tickets: [920249]
|
||||
|
||||
- title: "Conversion: When automatically inserting page breaks, do not put a page break before a <h1> or <h2> tag if it is immediately preceded by another <h1> or <h2> tag."
|
||||
|
||||
- title: "Driver for EZReader T730 and Point-of-View PlayTab Pro"
|
||||
tickets: [923283, 922969]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix device entry not visible in menubar even when it has been added via Preferences->Toolbars."
|
||||
tickets: [923175]
|
||||
|
||||
- title: "Fix metadata plugboards not applied when auto sending news by email"
|
||||
|
||||
- title: "Fix regression in 0.8.34 that broke recipes that used skip_ad_pages() but not get_browser(). "
|
||||
tickets: [923724]
|
||||
|
||||
- title: "Restore device support on FreeBSD, by using HAL"
|
||||
tickets: [924503]
|
||||
|
||||
- title: "Get books: Show no more than 10 results from the Gandalf store"
|
||||
|
||||
- title: "Content server: Fix metadata not being updated when sending for some MOBI files."
|
||||
tickets: [923130]
|
||||
|
||||
- title: "Heuristic processing: Fix the italicize common patterns algorithm breaking on some HTML markup."
|
||||
tickets: [922317]
|
||||
|
||||
- title: "When trying to find an ebook inside a zip file, do not fail if the zip file itself contains other zip files."
|
||||
tickets: [925670]
|
||||
|
||||
- title: "EPUB Input: Handle EPUBs with duplicate entries in the manifest."
|
||||
tickets: [925831]
|
||||
|
||||
- title: "MOBI Input: Handle files that have extra </html> tags sprinkled through out their markup."
|
||||
tickets: [925833]
|
||||
|
||||
improved recipes:
|
||||
- Metro Nieuws NL
|
||||
- FHM UK
|
||||
|
||||
new recipes:
|
||||
- title: Strange Horizons
|
||||
author: Jim DeVona
|
||||
|
||||
- title: Telegraph India and Live Mint
|
||||
author: Krittika Goyal
|
||||
|
||||
- title: High Country News
|
||||
author: Armin Geller
|
||||
|
||||
- title: Countryfile
|
||||
author: Dave Asbury
|
||||
|
||||
- title: Liberation (subscription version)
|
||||
author: Remi Vanicat
|
||||
|
||||
- title: Various Italian news sources
|
||||
author: faber1971
|
||||
|
||||
|
||||
- version: 0.8.37
|
||||
date: 2012-01-27
|
||||
|
||||
|
18
recipes/albert_mohler.recipe
Normal file
18
recipes/albert_mohler.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AlbertMohlersBlog(BasicNewsRecipe):
|
||||
title = u'Albert Mohler\'s Blog'
|
||||
__author__ = 'Peter Grungi'
|
||||
language = 'en'
|
||||
oldest_article = 90
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
cover_url = 'http://www.albertmohler.com/wp-content/themes/albert-mohler-v5/img/logo-am-lg.gif'
|
||||
publisher = 'Albert Mohler'
|
||||
language = 'en'
|
||||
author = 'Albert Mohler'
|
||||
|
||||
feeds = [(u'Albert Mohler\'s Blog', u'http://feeds.feedburner.com/AlbertMohlersBlog?format=xml')]
|
51
recipes/asianreviewofbooks.recipe
Normal file
51
recipes/asianreviewofbooks.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.asianreviewofbooks.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AsianReviewOfBooks(BasicNewsRecipe):
|
||||
title = 'The Asian Review of Books'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'In addition to reviewing books about or of relevance to Asia, the Asian Review of Books also features long-format essays by leading Asian writers and thinkers, to providing an unparalleled forum for discussion of key contemporary issues by Asians for Asia and a vehicle of intellectual depth and breadth where leading thinkers can write on the books, arts and ideas of the day. Widely quoted and referenced, with an archive of more than one thousand book reviews, it is the only web resource dedicated to Asian books. And now, with the addition of the new premium content, the Asian Review of Books, is a must-read publication.'
|
||||
publisher = 'The Asian Review of Books'
|
||||
category = 'literature, books, reviews, Asia'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
language = 'en_HK'
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://www.asianreviewofbooks.com/new/images/mob_arb.png'
|
||||
extra_css = """
|
||||
body{font-family: serif}
|
||||
.big {font-size: xx-large}
|
||||
.bold {font-weight: bold}
|
||||
.italic {font-style: italic}
|
||||
.small {font-size: small}
|
||||
img {display: block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
remove_tags = [dict(name=['object','script','iframe','embed'])]
|
||||
remove_attributes = ['style', 'onclick']
|
||||
feeds = [(u'Articles' , u'http://www.asianreviewofbooks.com/new/rss.php')]
|
||||
|
||||
def print_version(self, url):
|
||||
root, sep, artid = url.rpartition('?ID=')
|
||||
return root + 'getarticle.php?articleID=' + artid + '&stats=web'
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
return '<html><head><title>title</title></head><body>' + raw + '</body></html>'
|
||||
|
@ -1,95 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
borba.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Borba(BasicNewsRecipe):
|
||||
title = 'Borba Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Dnevne novine Borba Online'
|
||||
publisher = 'IP Novine Borba'
|
||||
category = 'news, politics, Serbia'
|
||||
language = 'sr'
|
||||
|
||||
lang = _('sr-Latn-RS')
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
||||
INDEX = u'http://www.borba.rs/'
|
||||
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'main'})]
|
||||
|
||||
remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base','img'])
|
||||
,dict(name='div',attrs={'id':'written_comments_title'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
|
||||
,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' )
|
||||
,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' )
|
||||
,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' )
|
||||
,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' )
|
||||
,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' )
|
||||
,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' )
|
||||
,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' )
|
||||
,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
|
||||
url = item['href']
|
||||
title = self.tag_to_string(item)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :''
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,45 +7,76 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Calgary Herald
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
title = u'Calgary Herald'
|
||||
url_prefix = 'http://www.calgaryherald.com'
|
||||
description = u'News from Calgary, AB'
|
||||
fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following three lines for the Regina Leader-Post
|
||||
#title = u'Regina Leader-Post'
|
||||
#url_prefix = 'http://www.leaderpost.com'
|
||||
#description = u'News from Regina, SK'
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||
#title = u'Saskatoon Star-Phoenix'
|
||||
#url_prefix = 'http://www.thestarphoenix.com'
|
||||
#description = u'News from Saskatoon, SK'
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
#title = u'Windsor Star'
|
||||
#url_prefix = 'http://www.windsorstar.com'
|
||||
#description = u'News from Windsor, ON'
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
#title = u'Ottawa Citizen'
|
||||
#url_prefix = 'http://www.ottawacitizen.com'
|
||||
#description = u'News from Ottawa, ON'
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
encoding = 'latin1'
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
extra_css = '''
|
||||
@ -64,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
@ -98,9 +196,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
atag = h1tag.find('a',href=True)
|
||||
if not atag:
|
||||
continue
|
||||
url = atag['href']
|
||||
if not url.startswith('http:'):
|
||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||
#self.log("Section %s" % key)
|
||||
#self.log("url %s" % url)
|
||||
title = self.tag_to_string(atag,False)
|
||||
|
11
recipes/catholic_daily_readings.recipe
Normal file
11
recipes/catholic_daily_readings.recipe
Normal file
@ -0,0 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1328971305(BasicNewsRecipe):
|
||||
title = u'Catholic Daily Readings'
|
||||
language = 'en'
|
||||
__author__ = 'adoucette'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Daily Readings - USCCB', u'http://www.usccb.org/bible/readings/rss/'), (u'Daily Reflection - One Bread One Body', u'http://www.presentationministries.com/general/rss.asp'), (u'Mass Readings - Universalis', u'http://www.universalis.com/atommass3.xml'), (u'Saint Of The Day - CNA', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')]
|
@ -77,8 +77,18 @@ class ChicagoTribune(BasicNewsRecipe):
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
print article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
url = article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
if url.endswith('?track=rss'):
|
||||
url = url.partition('?')[0]
|
||||
return url
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
text = soup.find(text='click here to continue to article')
|
||||
if text:
|
||||
a = text.parent
|
||||
url = a.get('href')
|
||||
if url:
|
||||
return self.index_to_soup(url, raw=True)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
# Remove the navigation bar. It was kept until now to be able to follow
|
||||
|
@ -1,38 +1,89 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Title: Common Dreams
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
|
||||
# Feb 2012: Cleaned up the output to have only the main article
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
'''
|
||||
commondreams.org
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CommonDreams(BasicNewsRecipe):
|
||||
# Identify the recipe
|
||||
|
||||
title = u'Common Dreams'
|
||||
description = u'Progressive news and views'
|
||||
description = u'Breaking News & Views for the Progressive Community.'
|
||||
cover_url = 'https://s3.amazonaws.com/s3.commondreams.org/images/common-dreams.png'
|
||||
__author__ = u'XanthanGum'
|
||||
language = 'en'
|
||||
|
||||
# Format the text
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
||||
h1{font-size: xx-large;}
|
||||
h2{font-size: large;}
|
||||
'''
|
||||
|
||||
# Pick no article older than seven days and limit the number of articles per feed to 100
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
# Remove everything before the article
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
# Flattens all the tables to make it compatible with Nook
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
remove_tags_before = dict(name = 'div', attrs = {'id':'node-header'})
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Remove everything after the article
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { font-size: 175%; font-weight: bold; } \
|
||||
h2 { font-size: 150%; font-weight: bold; } \
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
# Remove the line breaks and float left/right and picture width/height.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'float:.*?'), lambda m: ''),
|
||||
(re.compile(r'width:.*?px'), lambda m: ''),
|
||||
(re.compile(r'height:.*?px'), lambda m: ''),
|
||||
(re.compile(r'<a.*?>'), lambda m: ''),
|
||||
(re.compile(r'</a>'), lambda m: ''),
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name = 'div', attrs = {'class':'copyright-info'})
|
||||
|
||||
# Main article is inside this tag
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':lambda x: x and 'node-' in x}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'node-links clear-block'}), # remove Share options
|
||||
]
|
||||
|
||||
|
||||
# Identify the news feeds
|
||||
|
||||
feeds = [(u'Headlines', u'http://www.commondreams.org/feed/headlines_rss'),
|
||||
(u'Further News Articles', u'http://www.commondreams.org/feed/further_rss'),
|
||||
(u'Views', u'http://www.commondreams.org/feed/views_rss'),
|
||||
(u'Progressive Newswire', u'http://www.commondreams.org/feed/newswire_rss')]
|
||||
feeds = [(u'Headlines', u'https://www.commondreams.org/feed/headlines_rss'),
|
||||
(u'Further News Articles', u'https://www.commondreams.org/feed/further_rss'),
|
||||
(u'Views', u'https://www.commondreams.org/feed/views_rss'),
|
||||
(u'Progressive Newswire', u'https://www.commondreams.org/feed/newswire_rss')]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
url = url + '?print'
|
||||
return url
|
71
recipes/consortium_news.recipe
Normal file
71
recipes/consortium_news.recipe
Normal file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Title: Consortium News
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
|
||||
# Feb 2012: Initial release
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
'''
|
||||
consortiumnews.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ConsortiumNews(BasicNewsRecipe):
|
||||
|
||||
title = u'Consortium News'
|
||||
publisher = 'Copyright © 2012 Consortiumnews. All Rights Reserved.'
|
||||
language = 'en'
|
||||
__author__ = 'kiavash'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
conversion_options = {'linearize_tables' : True} # Flattens all the tables to make it compatible with Nook
|
||||
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { font-size: 175%; font-weight: bold; } \
|
||||
h2 { font-size: 150%; font-weight: bold; } \
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
# Remove the line breaks and float left/right and picture width/height.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'float:.*?'), lambda m: ''),
|
||||
(re.compile(r'width:.*?px'), lambda m: ''),
|
||||
(re.compile(r'height:.*?px'), lambda m: ''),
|
||||
(re.compile(r'<a.*?>'), lambda h1: ''),
|
||||
(re.compile(r'</a>'), lambda h2: ''),
|
||||
]
|
||||
|
||||
# Main article is inside this tag
|
||||
keep_only_tags = [dict(name='div', attrs={'id':lambda x: x and 'post-' in x})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'sociable'}), # remove 'Share this Article'
|
||||
dict(name='p', attrs={'class':'tags'}), # remove 'Tags: ... '
|
||||
]
|
||||
|
||||
feeds = [(u'Consortium News', u'http://feeds.feedburner.com/Consortiumnewscom')]
|
@ -5,7 +5,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
description = 'News as provide by The Daily Mirror -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 26/12/11
|
||||
# last updated 11/2/12
|
||||
language = 'en_GB'
|
||||
|
||||
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||
@ -14,35 +14,58 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
max_articles_per_feed = 5
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
#conversion_options = { 'linearize_tables' : True }
|
||||
|
||||
|
||||
#keep_only_tags = [
|
||||
# dict(name='h1'),
|
||||
# dict(name='div',attrs={'id' : 'body-content'}),
|
||||
#dict(name='div',atts={'class' : 'article-body'}),
|
||||
#dict(attrs={'class' : ['article-attr','byline append-1','published']}),
|
||||
#dict(name='p'),
|
||||
# ]
|
||||
|
||||
#remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='title'),
|
||||
dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
|
||||
# dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
|
||||
#dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
|
||||
#dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
|
||||
]
|
||||
|
||||
# preprocess_regexps = [
|
||||
#(re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
#preprocess_regexps = [
|
||||
#(re.compile(r'Sponsored Links', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
feeds = [
|
||||
|
||||
(u'News', u'http://www.mirror.co.uk/news/rss.xml')
|
||||
,(u'Tech News', u'http://www.mirror.co.uk/news/technology/rss.xml')
|
||||
,(u'Weird World','http://www.mirror.co.uk/news/weird-world/rss.xml')
|
||||
,(u'Film Gossip','http://www.mirror.co.uk/celebs/film/rss.xml')
|
||||
,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml')
|
||||
,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml')
|
||||
,(u'Sport','http://www.mirror.co.uk/sport/rss.xml')
|
||||
,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
|
||||
,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
|
||||
,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||
(u'UK News', u'http://feed43.com/0287771688643868.xml')
|
||||
,(u'Tech News', u'http://feed43.com/2455520588350501.xml')
|
||||
,(u'Weird World','http://feed43.com/0863800333634654.xml')
|
||||
,(u'Sport','http://feed43.com/7713243036546130.xml')
|
||||
,(u'Sport : Boxing ','http://feed43.com/0414732220804255.xml')
|
||||
,(u'Sport : Rugby Union','http://feed43.com/4710138762362383.xml')
|
||||
,(u'Sport : Other','http://feed43.com/4501416886323415.xml')
|
||||
,(u'TV and Film','http://feed43.com/5238302853765104.xml')
|
||||
,(u'Celebs','http://feed43.com/8770061048844683.xml')
|
||||
,(u'Life Style : Family','http://feed43.com/4356170742410338.xml')
|
||||
,(u'Travel','http://feed43.com/1436576006476607.xml')
|
||||
|
||||
|
||||
|
||||
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||
]
|
||||
|
21
recipes/desiring_god.recipe
Normal file
21
recipes/desiring_god.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class DesiringGodEnglish(BasicNewsRecipe):
|
||||
title = u'Desiring God'
|
||||
__author__ = 'Peter Grungi'
|
||||
language = 'en'
|
||||
|
||||
cover_url = 'http://cdn0.desiringgod.org/images/layout/breadcrumbs_dg_mark.png'
|
||||
masthead_url = 'http://cdn0.desiringgod.org/images/layout/breadcrumbs_dg_mark.png'
|
||||
language = 'en'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
publisher = 'Desiring God Ministries'
|
||||
author = 'Desiring God Ministries'
|
||||
|
||||
feeds = [(u'Desiring God Blog', u'http://feeds.feedburner.com/DGBlog?format=xml')]
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,45 +7,72 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Edmonton Journal
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
title = u'Edmonton Journal'
|
||||
url_prefix = 'http://www.edmontonjournal.com'
|
||||
description = u'News from Edmonton, AB'
|
||||
fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following three lines for the Calgary Herald
|
||||
#title = u'Calgary Herald'
|
||||
#url_prefix = 'http://www.calgaryherald.com'
|
||||
#description = u'News from Calgary, AB'
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following three lines for the Regina Leader-Post
|
||||
#title = u'Regina Leader-Post'
|
||||
#url_prefix = 'http://www.leaderpost.com'
|
||||
#description = u'News from Regina, SK'
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||
#title = u'Saskatoon Star-Phoenix'
|
||||
#url_prefix = 'http://www.thestarphoenix.com'
|
||||
#description = u'News from Saskatoon, SK'
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
#title = u'Windsor Star'
|
||||
#url_prefix = 'http://www.windsorstar.com'
|
||||
#description = u'News from Windsor, ON'
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
#title = u'Ottawa Citizen'
|
||||
#url_prefix = 'http://www.ottawacitizen.com'
|
||||
#description = u'News from Ottawa, ON'
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
@ -68,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
@ -3,10 +3,17 @@ import re
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
''' there are three modifications:
|
||||
1) fetch issue cover
|
||||
2) toggle ignore premium articles
|
||||
3) extract proper section names, ie. "Comments", "Essay"
|
||||
|
||||
by Chen Wei weichen302@gmx.com, 2012-02-05'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
language = 'en'
|
||||
version = 1
|
||||
version = 1.01
|
||||
|
||||
title = u'Foreign Affairs (Subcription or (free) Registration)'
|
||||
publisher = u'Council on Foreign Relations'
|
||||
@ -17,6 +24,9 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
|
||||
INDEX = 'http://www.foreignaffairs.com'
|
||||
FRONTPAGE = 'http://www.foreignaffairs.com/magazine'
|
||||
INCLUDE_PREMIUM = False
|
||||
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name = 'base'))
|
||||
@ -37,6 +47,12 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(self.FRONTPAGE)
|
||||
div = soup.find('div', attrs={'class':'inthemag-issuebuy-cover'})
|
||||
img_url = div.find('img')['src']
|
||||
return self.INDEX + img_url
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url)
|
||||
@ -50,57 +66,46 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
||||
|
||||
return self.temp_files[-1].name
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.foreignaffairs.com/magazine')
|
||||
articles = []
|
||||
answer = []
|
||||
content = soup.find('div', attrs = {'class': 'center-wrapper'})
|
||||
if content:
|
||||
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
|
||||
tag = div.find('div', attrs = {'class': 'views-field-title'})
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
|
||||
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
|
||||
tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
# If they ever fix their markup, this will break :-(
|
||||
summary = self.tag_to_string(tag.findNextSibling('p'))
|
||||
description = author + '<br/>' + summary
|
||||
|
||||
articles.append({'title': title, 'date': None, 'url': url, 'description': description})
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
answer.append(('Magazine', articles))
|
||||
|
||||
ul = content.find('ul')
|
||||
if ul:
|
||||
soup = self.index_to_soup(self.FRONTPAGE)
|
||||
sec_start = soup.findAll('div', attrs={'class':'panel-separator'})
|
||||
for sec in sec_start:
|
||||
content = sec.nextSibling
|
||||
if content:
|
||||
section = self.tag_to_string(content.find('h2'))
|
||||
articles = []
|
||||
for li in ul.findAll('li'):
|
||||
tag = li.find('div', attrs = {'class': 'views-field-title'})
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
description = ''
|
||||
tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
|
||||
if tag:
|
||||
description = self.tag_to_string(tag)
|
||||
|
||||
articles.append({'title': title, 'date': None, 'url': url, 'description': description})
|
||||
else:
|
||||
continue
|
||||
tags = []
|
||||
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
|
||||
tags.append(div)
|
||||
for li in content.findAll('li'):
|
||||
tags.append(li)
|
||||
|
||||
for div in tags:
|
||||
title = url = description = author = None
|
||||
|
||||
if self.INCLUDE_PREMIUM:
|
||||
found_premium = False
|
||||
else:
|
||||
continue
|
||||
|
||||
answer.append(('Letters to the Editor', articles))
|
||||
found_premium = div.findAll('span', attrs={'class':
|
||||
'premium-icon'})
|
||||
if not found_premium:
|
||||
tag = div.find('div', attrs={'class': 'views-field-title'})
|
||||
|
||||
if tag:
|
||||
a = tag.find('a')
|
||||
if a:
|
||||
title = self.tag_to_string(a)
|
||||
url = self.INDEX + a['href']
|
||||
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
|
||||
tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
|
||||
description = self.tag_to_string(tag_summary)
|
||||
articles.append({'title':title, 'date':None, 'url':url,
|
||||
'description':description, 'author':author})
|
||||
if articles:
|
||||
answer.append((section, articles))
|
||||
return answer
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -8,21 +8,27 @@ Fetch High Country News
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class HighCountryNews(BasicNewsRecipe):
|
||||
|
||||
title = u'High Country News'
|
||||
description = u'News from the American West'
|
||||
__author__ = 'Armin Geller' # 2012-01-28
|
||||
publisher = 'High Country News'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
language = 'en'
|
||||
encoding = 'UTF-8'
|
||||
title = u'High Country News'
|
||||
description = u'News from the American West'
|
||||
__author__ = 'Armin Geller' # 2012-01-31
|
||||
publisher = 'High Country News'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
language = 'en-Us'
|
||||
encoding = 'UTF-8'
|
||||
publication_type = 'newspaper'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.hcn.org/logo.jpg' # 2012-01-31 AGe add
|
||||
cover_source = 'http://www.hcn.org' # 2012-01-31 AGe add
|
||||
|
||||
def get_cover_url(self): # 2012-01-31 AGe add
|
||||
cover_source_soup = self.index_to_soup(self.cover_source)
|
||||
preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'})
|
||||
return preview_image_div.div.img['src']
|
||||
|
||||
feeds = [
|
||||
(u'Most recent', u'http://feeds.feedburner.com/hcn/most-recent'),
|
||||
|
BIN
recipes/icons/asianreviewofbooks.png
Normal file
BIN
recipes/icons/asianreviewofbooks.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 906 B |
Before Width: | Height: | Size: 712 B After Width: | Height: | Size: 712 B |
Binary file not shown.
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 289 B |
BIN
recipes/icons/samanyolu_haber.png
Normal file
BIN
recipes/icons/samanyolu_haber.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 968 B |
110
recipes/ilmanifesto.recipe
Normal file
110
recipes/ilmanifesto.recipe
Normal file
@ -0,0 +1,110 @@
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/'
|
||||
|
||||
class IlManifesto(BasicNewsRecipe):
|
||||
title = 'Il Manifesto'
|
||||
__author__ = 'Giacomo Lacava'
|
||||
description = 'quotidiano comunista - ultima edizione html disponibile'
|
||||
publication_type = 'newspaper'
|
||||
publisher = 'il manifesto coop. editrice a r.l.'
|
||||
language = 'it'
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 1
|
||||
no_stylesheets = True
|
||||
simultaneous_downloads = 5
|
||||
timeout = 30
|
||||
auto_cleanup = True
|
||||
remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})]
|
||||
remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'})
|
||||
remove_tags_after = dict(id='myPrintArea')
|
||||
|
||||
manifesto_index = None
|
||||
manifesto_datestr = None
|
||||
|
||||
def _set_manifesto_index(self):
|
||||
if self.manifesto_index == None:
|
||||
startUrl = MANIFESTO_BASEURL + 'area-abbonati/in-edicola/'
|
||||
startSoup = self.index_to_soup(startUrl)
|
||||
lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href']
|
||||
del(startSoup)
|
||||
self.manifesto_index = MANIFESTO_BASEURL + lastEdition
|
||||
urlsplit = lastEdition.split('/')
|
||||
self.manifesto_datestr = urlsplit[-1]
|
||||
if urlsplit[-1] == '':
|
||||
self.manifesto_datestr = urlsplit[-2]
|
||||
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
self._set_manifesto_index()
|
||||
url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr
|
||||
return url
|
||||
|
||||
def parse_index(self):
|
||||
self._set_manifesto_index()
|
||||
soup = self.index_to_soup(self.manifesto_index)
|
||||
feedLinks = soup.find('div',id='accordion_inedicola').findAll('a')
|
||||
result = []
|
||||
for feed in feedLinks:
|
||||
articles = []
|
||||
feedName = feed.find('h2').string
|
||||
feedUrl = MANIFESTO_BASEURL + feed['href']
|
||||
feedSoup = self.index_to_soup(feedUrl)
|
||||
indexRoot = feedSoup.find('div',attrs={'class':'column1'})
|
||||
for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}):
|
||||
artLink = div.find('a')
|
||||
if artLink is None: continue # empty div
|
||||
title = artLink.string
|
||||
url = MANIFESTO_BASEURL + artLink['href']
|
||||
|
||||
description = ''
|
||||
descNode = div.find('div',attrs={'class':'text_12'})
|
||||
if descNode is not None:
|
||||
description = descNode.string
|
||||
|
||||
author = ''
|
||||
authNode = div.find('div',attrs={'class':'firma'})
|
||||
if authNode is not None:
|
||||
author = authNode.string
|
||||
|
||||
articleText = ''
|
||||
article = {
|
||||
'title':title,
|
||||
'url':url,
|
||||
'date': strftime('%d %B %Y'),
|
||||
'description': description,
|
||||
'content': articleText,
|
||||
'author': author
|
||||
}
|
||||
articles.append(article)
|
||||
result.append((feedName,articles))
|
||||
return result
|
||||
|
||||
|
||||
def extract_readable_article(self, html, url):
|
||||
|
||||
bs = BeautifulSoup(html)
|
||||
col1 = bs.find('div',attrs={'class':'column1'})
|
||||
|
||||
content = col1.find('div',attrs={'class':'bodytext'})
|
||||
title = bs.find(id='titolo_articolo').string
|
||||
author = col1.find('span',attrs={'class':'firma'})
|
||||
subtitle = ''
|
||||
subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'})
|
||||
if subNode is not None:
|
||||
subtitle = subNode
|
||||
summary = ''
|
||||
sommNode = bs.find('div',attrs={'class':'sommario'})
|
||||
if sommNode is not None:
|
||||
summary = sommNode
|
||||
|
||||
template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>"
|
||||
del(bs)
|
||||
return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content)
|
||||
|
||||
|
@ -13,9 +13,10 @@ class Kurier(BasicNewsRecipe):
|
||||
publisher = 'KURIER'
|
||||
category = 'news, politics, Austria'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
max_articles_per_feed = 100
|
||||
timeout = 30
|
||||
encoding = None
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'de_AT'
|
||||
remove_empty_feeds = True
|
||||
@ -29,9 +30,11 @@ class Kurier(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(attrs={'class':['functionsleiste','functions','social_positionierung','contenttabs','drucken','versenden','leserbrief','kommentieren','addthis_button']})]
|
||||
remove_tags = [ dict(attrs={'id':['artikel_expand_symbol2','imgzoom_close2']}),
|
||||
dict(attrs={'class':['linkextern','functionsleiste','functions','social_positionierung','contenttabs','drucken','versenden','leserbrief','kommentieren','addthis_button']})
|
||||
]
|
||||
keep_only_tags = [dict(attrs={'id':'content'})]
|
||||
remove_tags_after = dict(attrs={'id':'author'})
|
||||
remove_tags_after = [dict(attrs={'id':'author'})]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
@ -41,7 +44,7 @@ class Kurier(BasicNewsRecipe):
|
||||
,(u'Kultur' , u'http://kurier.at/rss/kultur_kultur_rss.xml' )
|
||||
,(u'Freizeit' , u'http://kurier.at/rss/freizeit_freizeit_rss.xml' )
|
||||
,(u'Wetter' , u'http://kurier.at/rss/oewetter_rss.xml' )
|
||||
,(u'Verkehr' , u'http://kurier.at/rss/verkehr_rss.xml' )
|
||||
,(u'Sport' , u'http://kurier.at/newsfeed/detail/sport_rss.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -1,8 +1,8 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
__author__ = 'Lorenzo Vigentini and Olivier Daigle'
|
||||
__copyright__ = '2012, Lorenzo Vigentini <l.vigentini at gmail.com>, Olivier Daigle <odaigle _at nuvucameras __dot__ com>'
|
||||
__version__ = 'v1.01'
|
||||
__date__ = '14, January 2010'
|
||||
__date__ = '12, February 2012'
|
||||
__description__ = 'Canadian Paper '
|
||||
|
||||
'''
|
||||
@ -18,7 +18,7 @@ class ledevoir(BasicNewsRecipe):
|
||||
description = 'Canadian Paper. A subscription is optional, with it you get more content'
|
||||
|
||||
cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
|
||||
title = u'Le Devoir'
|
||||
title = u'Le Devoir '
|
||||
publisher = 'leDevoir.com'
|
||||
category = 'News, finance, economy, politics'
|
||||
|
||||
@ -26,11 +26,15 @@ class ledevoir(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
max_articles_per_feed = 50
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 200
|
||||
use_embedded_content = False
|
||||
recursion = 10
|
||||
needs_subscription = 'optional'
|
||||
|
||||
filterDuplicates = False
|
||||
url_list = []
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
@ -38,7 +42,7 @@ class ledevoir(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'article'}),
|
||||
dict(name='ul', attrs={'id':'ariane'})
|
||||
dict(name='div', attrs={'id':'colonne_principale'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
@ -51,7 +55,7 @@ class ledevoir(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
(u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
|
||||
(u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
|
||||
(u'Édition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
|
||||
(u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
|
||||
(u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
|
||||
(u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
|
||||
@ -61,7 +65,7 @@ class ledevoir(BasicNewsRecipe):
|
||||
(u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
|
||||
(u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
|
||||
(u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
|
||||
(u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
|
||||
(u'Art de vivre', 'http://www.ledevoir.com/rss/section/art-de-vivre.xml?id=50')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
@ -85,8 +89,16 @@ class ledevoir(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.ledevoir.com')
|
||||
br.select_form(nr=1)
|
||||
br['login[courriel]'] = self.username
|
||||
br['login[password]'] = self.password
|
||||
br.select_form(nr=0)
|
||||
br['login_popup[courriel]'] = self.username
|
||||
br['login_popup[password]'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def print_version(self, url):
|
||||
if self.filterDuplicates:
|
||||
if url in self.url_list:
|
||||
return
|
||||
self.url_list.append(url)
|
||||
return url
|
||||
|
||||
|
@ -1,41 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.livemint.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LiveMint(BasicNewsRecipe):
|
||||
title = u'Livemint'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The Wall Street Journal'
|
||||
publisher = 'The Wall Street Journal'
|
||||
category = 'news, games, adventure, technology'
|
||||
language = 'en'
|
||||
title = u'Live Mint'
|
||||
language = 'en_IN'
|
||||
__author__ = 'Krittika Goyal'
|
||||
#encoding = 'cp1252'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = True
|
||||
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
extra_css = ' #dvArtheadline{font-size: x-large} #dvArtAbstract{font-size: large} '
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'innercontent'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed','form','iframe'])]
|
||||
feeds = [
|
||||
('Latest News',
|
||||
'http://www.livemint.com/StoryRss.aspx?LN=Latestnews'),
|
||||
('Gallery',
|
||||
'http://www.livemint.com/GalleryRssfeed.aspx'),
|
||||
('Top Stories',
|
||||
'http://www.livemint.com/StoryRss.aspx?ts=Topstories'),
|
||||
('Banking',
|
||||
'http://www.livemint.com/StoryRss.aspx?Id=104'),
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://www.livemint.com/SectionRssfeed.aspx?Mid=1')]
|
||||
|
||||
def print_version(self, url):
|
||||
link = url
|
||||
msoup = self.index_to_soup(link)
|
||||
mlink = msoup.find(attrs={'id':'ctl00_bodyplaceholdercontent_cntlArtTool_printUrl'})
|
||||
if mlink:
|
||||
link = 'http://www.livemint.com/Articles/' + mlink['href'].rpartition('/Articles/')[2]
|
||||
return link
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
25
recipes/living_stones.recipe
Normal file
25
recipes/living_stones.recipe
Normal file
@ -0,0 +1,25 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LivingStonesPastorsBlog(BasicNewsRecipe):
|
||||
title = u'Living Stones Pastors Blog'
|
||||
__author__ = 'Peter Grungi'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 90
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
cover_url = 'http://blogs.livingstonesreno.com/wp-content/uploads/2011/08/blogBGRD_norepeat.jpg'
|
||||
masthead_url = 'http://www.livingstonesreno.com/podcast/LSpodcastnew.jpg'
|
||||
publisher = 'Living Stones Church of Reno, NV'
|
||||
language = 'en'
|
||||
author = 'Living Stones Church of Reno, NV'
|
||||
|
||||
feeds = [(u'LS Blog', u'http://blogs.livingstonesreno.com/feed?utm_source=calibre&utm_medium=rss')]
|
||||
|
||||
def full_version(self, url):
|
||||
import re
|
||||
newurl = re.sub(r'\?.*','',url)
|
||||
return newurl
|
@ -38,18 +38,23 @@ except:
|
||||
removed keep_only tags
|
||||
Version 1.8 26-11-2022
|
||||
added remove tag: article-slideshow
|
||||
Version 1.9 31-1-2012
|
||||
removed some left debug settings
|
||||
extended timeout from 2 to 10
|
||||
changed oldest article from 10 to 1.2
|
||||
changed max articles from 15 to 25
|
||||
'''
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Metro Nieuws NL'
|
||||
oldest_article = 10
|
||||
max_articles_per_feed = 15
|
||||
oldest_article = 1.2
|
||||
max_articles_per_feed = 25
|
||||
__author__ = u'DrMerry'
|
||||
description = u'Metro Nederland'
|
||||
language = u'nl'
|
||||
simultaneous_downloads = 5
|
||||
simultaneous_downloads = 3
|
||||
masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
|
||||
timeout = 2
|
||||
timeout = 10
|
||||
center_navbar = True
|
||||
timefmt = ' [%A, %d %b %Y]'
|
||||
no_stylesheets = True
|
||||
|
217
recipes/microwave_and_rf.recipe
Normal file
217
recipes/microwave_and_rf.recipe
Normal file
@ -0,0 +1,217 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Title: Microwave and RF
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
|
||||
# Feb 2012: Initial release
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
'''
|
||||
mwrf.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class Microwave_and_RF(BasicNewsRecipe):
|
||||
|
||||
Convert_Grayscale = False # Convert images to gray scale or not
|
||||
|
||||
# Add sections that want to be excluded from the magazine
|
||||
exclude_sections = []
|
||||
|
||||
# Add sections that want to be included from the magazine
|
||||
include_sections = []
|
||||
|
||||
title = u'Microwave and RF'
|
||||
__author__ = 'kiavash'
|
||||
description = u'Microwave and RF Montly Magazine'
|
||||
publisher = 'Penton Media, Inc.'
|
||||
publication_type = 'magazine'
|
||||
site = 'http://mwrf.com'
|
||||
|
||||
language = 'en'
|
||||
asciiize = True
|
||||
timeout = 120
|
||||
simultaneous_downloads = 1 # very peaky site!
|
||||
|
||||
# Main article is inside this tag
|
||||
keep_only_tags = [dict(name='table', attrs={'id':'prtContent'})]
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
# Flattens all the tables to make it compatible with Nook
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class':'body12'}),
|
||||
]
|
||||
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { font-size: 175%; font-weight: bold; } \
|
||||
h2 { font-size: 150%; font-weight: bold; } \
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
# Remove the line breaks and float left/right and picture width/height.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'float:.*?'), lambda m: ''),
|
||||
(re.compile(r'width:.*?px'), lambda m: ''),
|
||||
(re.compile(r'height:.*?px'), lambda m: '')
|
||||
]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
url = re.sub(r'.html', '', url)
|
||||
url = re.sub('/ArticleID/.*?/', '/Print.cfm?ArticleID=', url)
|
||||
return url
|
||||
|
||||
# Need to change the user agent to avoid potential download errors
|
||||
def get_browser(self, *args, **kwargs):
|
||||
from calibre import browser
|
||||
kwargs['user_agent'] = 'Mozilla/5.0 (Windows NT 5.1; rv:10.0) Gecko/20100101 Firefox/10.0'
|
||||
return browser(*args, **kwargs)
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
# Fetches the main page of Microwave and RF
|
||||
soup = self.index_to_soup(self.site)
|
||||
|
||||
# Searches the site for Issue ID link then returns the href address
|
||||
# pointing to the latest issue
|
||||
latest_issue = soup.find('a', attrs={'href':lambda x: x and 'IssueID' in x}).get('href')
|
||||
|
||||
# Fetches the index page for of the latest issue
|
||||
soup = self.index_to_soup(latest_issue)
|
||||
|
||||
# Finds the main section of the page containing cover, issue date and
|
||||
# TOC
|
||||
ts = soup.find('div', attrs={'id':'columnContainer'})
|
||||
|
||||
# Finds the issue date
|
||||
ds = ' '.join(self.tag_to_string(ts.find('span', attrs={'class':'CurrentIssueSectionHead'})).strip().split()[-2:]).capitalize()
|
||||
self.log('Found Current Issue:', ds)
|
||||
self.timefmt = ' [%s]'%ds
|
||||
|
||||
# Finds the cover image
|
||||
cover = ts.find('img', src = lambda x: x and 'Cover' in x)
|
||||
if cover is not None:
|
||||
self.cover_url = self.site + cover['src']
|
||||
self.log('Found Cover image:', self.cover_url)
|
||||
|
||||
feeds = []
|
||||
article_info = []
|
||||
|
||||
# Finds all the articles (tiles and links)
|
||||
articles = ts.findAll('a', attrs={'class':'commonArticleTitle'})
|
||||
|
||||
# Finds all the descriptions
|
||||
descriptions = ts.findAll('span', attrs={'class':'commonCopy'})
|
||||
|
||||
# Find all the sections
|
||||
sections = ts.findAll('span', attrs={'class':'kicker'})
|
||||
|
||||
title_number = 0
|
||||
|
||||
# Goes thru all the articles one by one and sort them out
|
||||
for section in sections:
|
||||
title_number = title_number + 1
|
||||
|
||||
# Removes the unwanted sections
|
||||
if self.tag_to_string(section) in self.exclude_sections:
|
||||
continue
|
||||
|
||||
# Only includes the wanted sections
|
||||
if self.include_sections:
|
||||
if self.tag_to_string(section) not in self.include_sections:
|
||||
continue
|
||||
|
||||
|
||||
title = self.tag_to_string(articles[title_number])
|
||||
url = articles[title_number].get('href')
|
||||
if url.startswith('/'):
|
||||
url = self.site + url
|
||||
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
desc = self.tag_to_string(descriptions[title_number])
|
||||
self.log('\t\t', desc)
|
||||
|
||||
article_info.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':self.timefmt})
|
||||
|
||||
if article_info:
|
||||
feeds.append((self.title, article_info))
|
||||
|
||||
#self.log(feeds)
|
||||
return feeds
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
if self.Convert_Grayscale:
|
||||
#process all the images
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
# Includes all the figures inside the final ebook
|
||||
# Finds all the jpg links
|
||||
for figure in soup.findAll('a', attrs = {'href' : lambda x: x and 'jpg' in x}):
|
||||
|
||||
# makes sure that the link points to the absolute web address
|
||||
if figure['href'].startswith('/'):
|
||||
figure['href'] = self.site + figure['href']
|
||||
|
||||
figure.name = 'img' # converts the links to img
|
||||
figure['src'] = figure['href'] # with the same address as href
|
||||
figure['style'] = 'display:block' # adds /n before and after the image
|
||||
del figure['href']
|
||||
del figure['target']
|
||||
|
||||
# Makes the title standing out
|
||||
for title in soup.findAll('a', attrs = {'class': 'commonSectionTitle'}):
|
||||
title.name = 'h1'
|
||||
del title['href']
|
||||
del title['target']
|
||||
|
||||
# Makes the section name more visible
|
||||
for section_name in soup.findAll('a', attrs = {'class': 'kicker2'}):
|
||||
section_name.name = 'h5'
|
||||
del section_name['href']
|
||||
del section_name['target']
|
||||
|
||||
# Removes all unrelated links
|
||||
for link in soup.findAll('a', attrs = {'href': True}):
|
||||
link.name = 'font'
|
||||
del link['href']
|
||||
del link['target']
|
||||
|
||||
return soup
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,15 +7,72 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
title = u'Montreal Gazette'
|
||||
url_prefix = 'http://www.montrealgazette.com'
|
||||
description = u'News from Montreal, QC'
|
||||
fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
@ -38,14 +96,81 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
@ -1,58 +1,53 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Title: Microwave Journal RSS recipe
|
||||
## Title: Microwave Journal
|
||||
## Contact: Kiavash (use Mobile Read)
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
## Copyright: Kiavash
|
||||
##
|
||||
## Written: Jan 2012
|
||||
## Last Edited: Jan 2012
|
||||
## Last Edited: Feb 2012
|
||||
##
|
||||
|
||||
# Feb 2012: New Recipe compatible with the MWJournal 2.0 website
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
__copyright__ = 'Kiavash'
|
||||
__author__ = 'Kaivash'
|
||||
|
||||
'''
|
||||
Microwave Journal Monthly Magazine
|
||||
You need to sign up (free) and get username/password.
|
||||
microwavejournal.com
|
||||
'''
|
||||
|
||||
import re # Import the regular expressions module.
|
||||
from calibre.ptempfile import TemporaryFile # we need this for saving to a temp file
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class MWJournal(BasicNewsRecipe):
|
||||
# Title to use for the ebook.
|
||||
title = u'Microwave Journal'
|
||||
__author__ = 'Kiavash'
|
||||
language = 'en'
|
||||
|
||||
#A brief description for the ebook.
|
||||
description = u'Microwave Journal web site ebook created using rss feeds.'
|
||||
|
||||
# Set publisher and publication type.
|
||||
publisher = 'Horizon House'
|
||||
title = u'Microwave Journal'
|
||||
description = u'Microwave Journal Monthly Magazine'
|
||||
publisher = 'Horizon House'
|
||||
publication_type = 'magazine'
|
||||
INDEX = 'http://www.microwavejournal.com/publications/'
|
||||
|
||||
oldest_article = 31 # monthly published magazine. Some months are 31 days!
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
# Disable stylesheets and javascript from site.
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
asciiize = True # Converts all none ascii characters to their ascii equivalents
|
||||
|
||||
needs_subscription = True # oh yeah... we need to login btw.
|
||||
|
||||
# Timeout for fetching files from the server in seconds. The default of 120 seconds, seems somewhat excessive.
|
||||
language = 'en'
|
||||
timeout = 30
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
Convert_Grayscale = False # Convert images to gray scale or not
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'record'})]
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_tags = [
|
||||
dict(name='font', attrs={'class':'footer'}), # remove fonts
|
||||
]
|
||||
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
@ -72,72 +67,75 @@ class MWJournal(BasicNewsRecipe):
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'boxadzonearea350'}), # Removes banner ads
|
||||
dict(name='font', attrs={'class':'footer'}), # remove fonts if you do like your fonts more! Comment out to use website's fonts
|
||||
dict(name='div', attrs={'class':'newsarticlead'})
|
||||
]
|
||||
|
||||
# Remove various tag attributes to improve the look of the ebook pages.
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Remove the line breaks as well as href links. Books don't have links generally speaking
|
||||
# Remove the line breaks, href links and float left/right and picture width/height.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<a.*?>'), lambda h1: ''),
|
||||
(re.compile(r'</a>'), lambda h2: '')
|
||||
(re.compile(r'</a>'), lambda h2: ''),
|
||||
(re.compile(r'float:.*?'), lambda h3: ''),
|
||||
(re.compile(r'width:.*?px'), lambda h4: ''),
|
||||
(re.compile(r'height:.*?px'), lambda h5: '')
|
||||
]
|
||||
|
||||
# Select the feeds that you are interested.
|
||||
feeds = [
|
||||
(u'Current Issue', u'http://www.mwjournal.com/rss/Rss.asp?type=99'),
|
||||
(u'Industry News', u'http://www.mwjournal.com/rss/Rss.asp?type=1'),
|
||||
(u'Resources', u'http://www.mwjournal.com/rss/Rss.asp?type=3'),
|
||||
(u'Buyer\'s Guide', u'http://www.mwjournal.com/rss/Rss.asp?type=5'),
|
||||
(u'Events', u'http://www.mwjournal.com/rss/Rss.asp?type=2'),
|
||||
(u'All Updates', u'http://www.mwjournal.com/rss/Rss.asp?type=0'),
|
||||
]
|
||||
|
||||
# No magazine is complete without cover. Let's get it then!
|
||||
# The function is adapted from the Economist recipe
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
cover_page_location = 'http://www.mwjournal.com/Journal/' # Cover image is located on this page
|
||||
soup = self.index_to_soup(cover_page_location)
|
||||
cover_item = soup.find('img',attrs={'src':lambda x: x and '/IssueImg/3_MWJ_CurrIss_CoverImg' in x}) # There are three files named cover, we want the highest resolution which is the 3rd image. So we look for the pattern. Remember that the name of the cover image changes every month so we cannot search for the complete name. Instead we are searching for the pattern
|
||||
if cover_item:
|
||||
cover_url = 'http://www.mwjournal.com' + cover_item['src'].strip() # yeah! we found it. Let's fetch the image file and pass it as cover to calibre
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
if url.find('/Journal/article.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/Journal/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
|
||||
elif url.find('/News/article.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/News/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
|
||||
elif url.find('/Resources/TechLib.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/Resources/TechLib.asp?HH_ID=', '/Resources/PrintRessource.asp?Id=')
|
||||
return url.replace('/articles/', '/articles/print/')
|
||||
|
||||
def get_browser(self):
|
||||
'''
|
||||
Microwave Journal website, directs the login page to omeda.com once login info is submitted, omeda.com redirects to mwjournal.com with again the browser logs in into that site (hidden from the user). To overcome this obsticle, first login page is fetch and its output is stored to an HTML file. Then the HTML file is opened again and second login form is submitted (Many thanks to Barty which helped with second page login).
|
||||
'''
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
url = ('http://www.omeda.com/cgi-win/mwjreg.cgi?m=login') # main login page.
|
||||
br.open(url) # fetch the 1st login page
|
||||
br.select_form('login') # finds the login form
|
||||
br['EMAIL_ADDRESS'] = self.username # fills the username
|
||||
br['PASSWORD'] = self.password # fills the password
|
||||
raw = br.submit().read() # submit the form and read the 2nd login form
|
||||
# save it to an htm temp file (from ESPN recipe written by Kovid Goyal kovid@kovidgoyal.net
|
||||
with TemporaryFile(suffix='.htm') as fname:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(raw)
|
||||
br.open_local_file(fname)
|
||||
br.select_form(nr=0) # finds submit on the 2nd form
|
||||
didwelogin = br.submit().read() # submit it and read the return html
|
||||
if 'Welcome ' not in didwelogin: # did it login successfully? Is Username/password correct?
|
||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||
#login is done
|
||||
return br
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
ts = soup.find('div', attrs={'class':'box1 article publications-show'})
|
||||
ds = self.tag_to_string(ts.find('h2'))
|
||||
self.log('Found Current Issue:', ds)
|
||||
self.timefmt = ' [%s]'%ds
|
||||
|
||||
cover = ts.find('img', src=True)
|
||||
if cover is not None:
|
||||
self.cover_url = 'http://www.microwavejournal.com' + cover['src']
|
||||
self.log('Found Cover image:', self.cover_url)
|
||||
|
||||
feeds = []
|
||||
seen_titles = set([]) # This is used to remove duplicant articles
|
||||
sections = soup.find('div', attrs={'class':'box2 publication'})
|
||||
for section in sections.findAll('div', attrs={'class':'records'}):
|
||||
section_title = self.tag_to_string(section.find('h3'))
|
||||
self.log('Found section:', section_title)
|
||||
articles = []
|
||||
for post in section.findAll('div', attrs={'class':'record'}):
|
||||
h = post.find('h2')
|
||||
title = self.tag_to_string(h)
|
||||
if title.find('The MWJ Puzzler') >=0: #Let's get rid of the useless Puzzler!
|
||||
continue
|
||||
if title in seen_titles:
|
||||
continue
|
||||
seen_titles.add(title)
|
||||
a = post.find('a', href=True)
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.microwavejournal.com'+url
|
||||
abstract = post.find('div', attrs={'class':'abstract'})
|
||||
p = abstract.find('p')
|
||||
desc = None
|
||||
self.log('\tFound article:', title, 'at', url)
|
||||
if p is not None:
|
||||
desc = self.tag_to_string(p)
|
||||
self.log('\t\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':self.timefmt})
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
return feeds
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
if self.Convert_Grayscale:
|
||||
#process all the images
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
26
recipes/novinite_bg.recipe
Normal file
26
recipes/novinite_bg.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1329123365(BasicNewsRecipe):
|
||||
title = u'Novinite.bg'
|
||||
__author__ = 'M3 Web'
|
||||
description = 'Real time provider of the latest news from Bulgaria and the world'
|
||||
category = 'Business, Politics, Society, Sports, Crime, Lifestyle, World, Health'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 6
|
||||
language = 'bg'
|
||||
encoding = 'windows-1251'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
remove_tags = [dict(name='div', attrs={'id':'text_options'})]
|
||||
remove_tags = [dict(name='div', attrs={'id':'social_shares_top'})]
|
||||
remove_tags_after = dict(id='textsize')
|
||||
feeds = [(u'Business', u'http://novinite.bg/rss.php?category_id=1'),
|
||||
(u'Politics', u'http://novinite.bg/rss.php?category_id=2'),
|
||||
(u'Society', u'http://novinite.bg/rss.php?category_id=3'),
|
||||
(u'Sport', u'http://novinite.bg/rss.php?category_id=4'),
|
||||
(u'Crime', u'http://novinite.bg/rss.php?category_id=5'),
|
||||
(u'Lifestyle', u'http://novinite.bg/rss.php?category_id=6'),
|
||||
(u'Health', u'http://novinite.bg/rss.php?category_id=7'),
|
||||
(u'Other', u'http://novinite.bg/rss.php?category_id=10'),
|
||||
(u'World', u'http://novinite.bg/rss.php?category_id=9')]
|
21
recipes/onda_rock.recipe
Normal file
21
recipes/onda_rock.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
__license__ = 'GPL v3'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1328535130(BasicNewsRecipe):
|
||||
title = u'Onda Rock'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Italian rock webzine'
|
||||
language = 'it'
|
||||
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['boxHeader','boxlinks_med','footer','boxinterviste','box_special_med','boxdiscografia_head','path']}),
|
||||
dict(name='div', attrs={'align':'left'}),
|
||||
dict(name='div', attrs={'style':'text-align: center'}),
|
||||
]
|
||||
no_stylesheets = True
|
||||
feeds = [(u'Onda Rock', u'http://www.ondarock.it/feed.php')]
|
||||
masthead_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/71135_45820579767_4993043_n.jpg'
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,20 +7,72 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
title = u'Ottawa Citizen'
|
||||
url_prefix = 'http://www.ottawacitizen.com'
|
||||
description = u'News from Ottawa, ON'
|
||||
fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
@ -43,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
@ -1,18 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
pescanik.net
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Pescanik(BasicNewsRecipe):
|
||||
title = 'Pescanik'
|
||||
title = 'Peščanik'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Pescanik'
|
||||
publisher = 'Pescanik'
|
||||
description = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
|
||||
publisher = 'Peščanik'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 10
|
||||
max_articles_per_feed = 100
|
||||
@ -20,8 +20,13 @@ class Pescanik(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'sr'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} .article_description,body{font-family: Arial,"Lucida Grande",Tahoma,Verdana,sans1,sans-serif} .contentheading{font-size: x-large; font-weight: bold} .small{font-size: small} .createdate{font-size: x-small; font-weight: bold} '
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://pescanik.net/wp-content/uploads/2011/10/logo1.png'
|
||||
extra_css = """
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Verdana,Arial,Tahoma,sans1,sans-serif}
|
||||
#BlogTitle{font-size: xx-large; font-weight: bold}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -32,29 +37,12 @@ class Pescanik(BasicNewsRecipe):
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_attributes = ['valign','colspan','width','height','align','alt']
|
||||
|
||||
remove_tags = [dict(name=['object','link','meta','script'])]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class':['contentheading','small','createdate']})
|
||||
,dict(name='td', attrs={'valign':'top','colspan':'2'})
|
||||
]
|
||||
|
||||
feeds = [(u'Pescanik Online', u'http://www.pescanik.net/index.php?option=com_rd_rss&id=12')]
|
||||
remove_tags = [dict(name=['object','link','meta','script','iframe','embed'])]
|
||||
keep_only_tags = [dict(attrs={'id':['BlogTitle','BlogDate','BlogContent']})]
|
||||
feeds = [
|
||||
(u'Autori' , u'http://pescanik.net/category/autori/feed/'),
|
||||
(u'Prevodi', u'http://pescanik.net/category/prevodi/feed/')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
nurl = url.replace('/index.php','/index2.php')
|
||||
return nurl + '&pop=1&page=0'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
st = soup.findAll('td')
|
||||
for it in st:
|
||||
it.name='p'
|
||||
for pt in soup.findAll('img'):
|
||||
brtag = Tag(soup,'br')
|
||||
brtag2 = Tag(soup,'br')
|
||||
pt.append(brtag)
|
||||
pt.append(brtag2)
|
||||
return soup
|
||||
return url + 'print/'
|
@ -1,30 +1,36 @@
|
||||
"""
|
||||
readitlaterlist.com
|
||||
"""
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '''
|
||||
2010, Darko Miletic <darko.miletic at gmail.com>
|
||||
2011, Przemyslaw Kryger <pkryger at gmail.com>
|
||||
'''
|
||||
'''
|
||||
readitlaterlist.com
|
||||
2012, tBunnyMan <Wag That Tail At Me dot com>
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Readitlater(BasicNewsRecipe):
|
||||
title = 'Read It Later'
|
||||
__author__ = 'Darko Miletic, Przemyslaw Kryger'
|
||||
description = '''Personalized news feeds. Go to readitlaterlist.com to
|
||||
setup up your news. Fill in your account
|
||||
username, and optionally you can add password.'''
|
||||
publisher = 'readitlater.com'
|
||||
title = 'ReadItLater'
|
||||
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
|
||||
description = '''Personalized news feeds. Go to readitlaterlist.com to setup \
|
||||
up your news. This version displays pages of articles from \
|
||||
oldest to newest, with max & minimum counts, and marks articles \
|
||||
read after downloading.'''
|
||||
publisher = 'readitlaterlist.com'
|
||||
category = 'news, custom'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
max_articles_per_feed = 50
|
||||
minimum_articles = 1
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
needs_subscription = True
|
||||
INDEX = u'http://readitlaterlist.com'
|
||||
LOGIN = INDEX + u'/l'
|
||||
readList = []
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -33,41 +39,46 @@ class Readitlater(BasicNewsRecipe):
|
||||
br.select_form(nr=0)
|
||||
br['feed_id'] = self.username
|
||||
if self.password is not None:
|
||||
br['password'] = self.password
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def get_feeds(self):
|
||||
self.report_progress(0, ('Fetching list of feeds...'))
|
||||
self.report_progress(0, ('Fetching list of pages...'))
|
||||
lfeeds = []
|
||||
i = 1
|
||||
feedurl = self.INDEX + u'/unread/1'
|
||||
while True:
|
||||
title = u'Unread articles, page ' + str(i)
|
||||
lfeeds.append((title, feedurl))
|
||||
self.report_progress(0, ('Got ') + str(i) + (' feeds'))
|
||||
lfeeds.insert(0, (title, feedurl))
|
||||
self.report_progress(0, ('Got ') + str(i) + (' pages'))
|
||||
i += 1
|
||||
soup = self.index_to_soup(feedurl)
|
||||
ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
|
||||
ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
|
||||
if ritem is None:
|
||||
break
|
||||
feedurl = self.INDEX + ritem['href']
|
||||
if self.test:
|
||||
return lfeeds[:2]
|
||||
return lfeeds
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
articlesToGrab = self.max_articles_per_feed
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
if articlesToGrab < 1:
|
||||
break
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
ritem = soup.find('ul',attrs={'id':'list'})
|
||||
for item in ritem.findAll('li'):
|
||||
ritem = soup.find('ul', attrs={'id':'list'})
|
||||
for item in reversed(ritem.findAll('li')):
|
||||
if articlesToGrab < 1:
|
||||
break
|
||||
else:
|
||||
articlesToGrab -= 1
|
||||
description = ''
|
||||
atag = item.find('a',attrs={'class':'text'})
|
||||
atag = item.find('a', attrs={'class':'text'})
|
||||
if atag and atag.has_key('href'):
|
||||
url = self.INDEX + atag['href']
|
||||
title = self.tag_to_string(item.div)
|
||||
@ -78,6 +89,20 @@ class Readitlater(BasicNewsRecipe):
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
readLink = item.find('a', attrs={'class':'check'})['href']
|
||||
self.readList.append(readLink)
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
if len(self.readList) < self.minimum_articles:
|
||||
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
|
||||
return totalfeeds
|
||||
|
||||
def mark_as_read(self, markList):
|
||||
br = self.get_browser()
|
||||
for link in markList:
|
||||
url = self.INDEX + link
|
||||
response = br.open(url)
|
||||
response
|
||||
|
||||
def cleanup(self):
|
||||
self.mark_as_read(self.readList)
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,35 +7,72 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Regina Leader-Post
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
title = u'Regina Leader-Post'
|
||||
url_prefix = 'http://www.leaderpost.com'
|
||||
description = u'News from Regina, SK'
|
||||
fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||
#title = u'Saskatoon Star-Phoenix'
|
||||
#url_prefix = 'http://www.thestarphoenix.com'
|
||||
#description = u'News from Saskatoon, SK'
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
#title = u'Windsor Star'
|
||||
#url_prefix = 'http://www.windsorstar.com'
|
||||
#description = u'News from Windsor, ON'
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
#title = u'Ottawa Citizen'
|
||||
#url_prefix = 'http://www.ottawacitizen.com'
|
||||
#description = u'News from Ottawa, ON'
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
@ -58,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
20
recipes/resurgence.recipe
Normal file
20
recipes/resurgence.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheResurgence(BasicNewsRecipe):
|
||||
title = u'The Resurgence'
|
||||
__author__ = 'Peter Grungi'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
cover_url = 'http://cdn.theresurgence.com/images/logo.png'
|
||||
masthead_url = 'http://cdn.theresurgence.com/images/logo.png'
|
||||
language = 'en'
|
||||
publisher = 'The Resurgence'
|
||||
author = 'The Resurgence'
|
||||
|
||||
feeds = [(u'The Resurgence', u'http://feeds.theresurgence.com/TheResurgence?format=xml')]
|
@ -10,6 +10,7 @@ class SHaber (BasicNewsRecipe):
|
||||
oldest_article =2
|
||||
max_articles_per_feed =100
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
@ -23,15 +24,15 @@ class SHaber (BasicNewsRecipe):
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
}
|
||||
extra_css = ' .Haber-Baslik-Yazisi {font-weight: bold; font-size: 9px} .Haber-Ozet-Yazisi{ font-family:sans-serif;font-weight: normal;font-size: 11px } #Haber{ font-family:sans-serif;font-weight: normal;font-size: 9px }.KirmiziText{ font-weight: normal;font-size: 5px }' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
|
||||
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi','Haber-Ozet-Yazisi']}),dict(name='div', attrs={'id':['ctl00_ContentPlaceHolder1_imagenew','Haber']})]#,dict(name='h6', attrs={'class':['KirmiziText',]}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
|
||||
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
|
||||
#keep_only_tags = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi','Haber-Ozet-Yazisi']}),dict(name='div', attrs={'id':['ctl00_ContentPlaceHolder1_imagenew','Haber']})]#,dict(name='h6', attrs={'class':['KirmiziText',]}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
|
||||
#remove_tags = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi']})]#attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
|
||||
|
||||
cover_img_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
|
||||
masthead_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
|
||||
cover_img_url = 'http://www.samanyoluhaber.com/include/logo.png'
|
||||
masthead_url = 'http://www.samanyoluhaber.com/include/logo.png'
|
||||
remove_empty_feeds= True
|
||||
#remove_attributes = ['width','height']
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,30 +7,72 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
title = u'Saskatoon Star-Phoenix'
|
||||
url_prefix = 'http://www.thestarphoenix.com'
|
||||
description = u'News from Saskatoon, SK'
|
||||
fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
#title = u'Windsor Star'
|
||||
#url_prefix = 'http://www.windsorstar.com'
|
||||
#description = u'News from Windsor, ON'
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
#title = u'Ottawa Citizen'
|
||||
#url_prefix = 'http://www.ottawacitizen.com'
|
||||
#description = u'News from Ottawa, ON'
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
@ -53,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
133
recipes/strange_horizons.recipe
Normal file
133
recipes/strange_horizons.recipe
Normal file
@ -0,0 +1,133 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import urlparse
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class StrangeHorizons(BasicNewsRecipe):
|
||||
|
||||
# Recipe metadata
|
||||
# Any issue archive page is an acceptable index as well.
|
||||
# However, reviews will not be included in older issues.
|
||||
# (Using the reviews archive instead of the recent reviews page would fix.)
|
||||
INDEX = 'http://www.strangehorizons.com/'
|
||||
title = 'Strange Horizons'
|
||||
description = 'A magazine of speculative fiction and related nonfiction. Best downloaded on weekends'
|
||||
masthead_url = 'http://strangehorizons.com/images/sh_head.gif'
|
||||
publication_type = 'magazine'
|
||||
language = 'en'
|
||||
__author__ = 'Jim DeVona'
|
||||
__version__ = '1.0'
|
||||
|
||||
# Cruft filters
|
||||
keep_only_tags = [dict(name='div', id='content')]
|
||||
remove_tags = [dict(name='p', attrs={'class': 'forum-links'}), dict(name='p', attrs={'class': 'top-link'})]
|
||||
remove_tags_after = [dict(name='p', attrs={'class': 'author-bio'})]
|
||||
|
||||
# Styles
|
||||
no_stylesheets = True
|
||||
extra_css = '''div.image-left { margin: 0.5em auto 1em auto; } div.image-right { margin: 0.5em auto 1em auto; } div.illustration { margin: 0.5em auto 1em auto; text-align: center; } p.image-caption { margin-top: 0.25em; margin-bottom: 1em; font-size: 75%; text-align: center; } h1 { font-size: 160%; } h2 { font-size: 110%; } h3 { font-size: 85%; } h4 { font-size: 80%; } p { font-size: 90%; margin: 1em 1em 1em 15px; } p.author-bio { font-size: 75%; font-style: italic; margin: 1em 1em 1em 15px; } p.author-bio i, p.author-bio cite, p.author-bio .foreign { font-style: normal; } p.author-copyright { font-size: 75%; text-align: center; margin: 3em 1em 1em 15px; } p.content-date { font-weight: bold; } p.dedication { font-style: italic; } div.stanza { margin-bottom: 1em; } div.stanza p { margin: 0px 1em 0px 15px; font-size: 90%; } p.verse-line { margin-bottom: 0px; margin-top: 0px; } p.verse-line-indent-1 { margin-bottom: 0px; margin-top: 0px; text-indent: 2em; } p.verse-line-indent-2 { margin-bottom: 0px; margin-top: 0px; text-indent: 4em; } p.verse-stanza-break { margin-bottom: 0px; margin-top: 0px; } .foreign { font-style: italic; } .thought { font-style: italic; } .thought cite { font-style: normal; } .thought em { font-style: normal; } blockquote { font-size: 90%; font-style: italic; } blockquote cite { font-style: normal; } blockquote em { font-style: normal; } blockquote .foreign { font-style: normal; } blockquote .thought { font-style: normal; } .speaker { font-weight: bold; } pre { margin-left: 15px; } div.screenplay { font-family: monospace; } blockquote.screenplay-dialogue { font-style: normal; font-size: 100%; } .screenplay p.dialogue-first { margin-top: 0; } .screenplay p.speaker { margin-bottom: 0; text-align: center; font-weight: normal; } blockquote.typed-letter { font-style: normal; font-size: 100%; font-family: monospace; } .no-italics { font-style: normal; }'''
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
sections = OrderedDict()
|
||||
strange_soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
# Find the heading that marks the start of this issue.
|
||||
issue_heading = strange_soup.find('h2')
|
||||
issue_date = self.tag_to_string(issue_heading)
|
||||
self.title = self.title + " - " + issue_date
|
||||
|
||||
# Examine subsequent headings for information about this issue.
|
||||
heading_tag = issue_heading.findNextSibling(['h2','h3'])
|
||||
while heading_tag != None:
|
||||
|
||||
# An h2 indicates the start of the next issue.
|
||||
if heading_tag.name == 'h2':
|
||||
break
|
||||
|
||||
# The heading begins with a word indicating the article category.
|
||||
section = self.tag_to_string(heading_tag).split(':', 1)[0].title()
|
||||
|
||||
# Reviews aren't linked from the index, so we need to look them up
|
||||
# separately. Currently using Recent Reviews page. The reviews
|
||||
# archive page lists all reviews, but is >500k.
|
||||
if section == 'Review':
|
||||
|
||||
# Get the list of recent reviews.
|
||||
review_soup = self.index_to_soup('http://www.strangehorizons.com/reviews/')
|
||||
review_titles = review_soup.findAll('p', attrs={'class': 'contents-title'})
|
||||
|
||||
# Get the list of reviews included in this issue. (Kludgey.)
|
||||
reviews_summary = heading_tag.findNextSibling('p', attrs={'class': 'contents-pullquote'})
|
||||
for br in reviews_summary.findAll('br'):
|
||||
br.replaceWith('----')
|
||||
review_summary_text = self.tag_to_string(reviews_summary)
|
||||
review_lines = review_summary_text.split(' ----')
|
||||
|
||||
# Look for each of the needed reviews (there are 3, right?)...
|
||||
for review_info in review_lines[0:3]:
|
||||
|
||||
# Get the review's release day (unused), title, and author.
|
||||
day, tna = review_info.split(': ', 1)
|
||||
article_title, article_author = tna.split(', reviewed by ')
|
||||
|
||||
# ... in the list of recent reviews.
|
||||
for review_title_tag in review_titles:
|
||||
review_title = self.tag_to_string(review_title_tag)
|
||||
if review_title != article_title:
|
||||
continue
|
||||
|
||||
# Extract review information from heading and surrounding text.
|
||||
article_summary = self.tag_to_string(review_title_tag.findNextSibling('p', attrs={'class': 'contents-pullquote'}))
|
||||
review_date = self.tag_to_string(review_title_tag.findNextSibling('p', attrs={'class': 'contents-date'}))
|
||||
article_url = review_title_tag.find('a')['href']
|
||||
|
||||
# Add this review to the Review section.
|
||||
if section not in sections:
|
||||
sections[section] = []
|
||||
sections[section].append({
|
||||
'title': article_title,
|
||||
'author': article_author,
|
||||
'url': article_url,
|
||||
'description': article_summary,
|
||||
'date': review_date})
|
||||
|
||||
break
|
||||
|
||||
else:
|
||||
# Try http://www.strangehorizons.com/reviews/archives.shtml
|
||||
self.log("Review not found in Recent Reviews:", article_title)
|
||||
|
||||
else:
|
||||
|
||||
# Extract article information from the heading and surrounding text.
|
||||
link = heading_tag.find('a')
|
||||
article_title = self.tag_to_string(link)
|
||||
article_url = urlparse.urljoin(self.INDEX, link['href'])
|
||||
article_author = link.nextSibling.replace(', by ', '')
|
||||
article_summary = self.tag_to_string(heading_tag.findNextSibling('p', attrs={'class':'contents-pullquote'}))
|
||||
|
||||
# Add article to the appropriate collection of sections.
|
||||
if section not in sections:
|
||||
sections[section] = []
|
||||
sections[section].append({
|
||||
'title': article_title,
|
||||
'author': article_author,
|
||||
'url': article_url,
|
||||
'description': article_summary,
|
||||
'date': issue_date})
|
||||
|
||||
heading_tag = heading_tag.findNextSibling(['h2','h3'])
|
||||
|
||||
# Manually insert standard info about the magazine.
|
||||
sections['About'] = [{
|
||||
'title': 'Strange Horizons',
|
||||
'author': 'Niall Harrison, Editor-in-Chief',
|
||||
'url': 'http://www.strangehorizons.com/AboutUs.shtml',
|
||||
'description': 'Strange Horizons is a magazine of and about speculative fiction and related nonfiction. Speculative fiction includes science fiction, fantasy, horror, slipstream, and all other flavors of fantastika. Work published in Strange Horizons has been shortlisted for or won Hugo, Nebula, Rhysling, Theodore Sturgeon, James Tiptree Jr., and World Fantasy Awards.',
|
||||
'date': ''}]
|
||||
|
||||
return sections.items()
|
||||
|
@ -1,24 +1,41 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
## History:
|
||||
## 1: Base Version
|
||||
## 2: Added rules for wdr.de, ndr.de, br-online.de
|
||||
## 3: Added rules for rbb-online.de, boerse.ard.de, sportschau.de
|
||||
|
||||
class Tagesschau(BasicNewsRecipe):
|
||||
title = 'Tagesschau'
|
||||
description = 'Nachrichten der ARD'
|
||||
publisher = 'ARD'
|
||||
language = 'de'
|
||||
version = 3
|
||||
|
||||
__author__ = 'Florian Andreas Pfaff'
|
||||
oldest_article = 7
|
||||
__author__ = 'Florian Andreas Pfaff, a.peter'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio']}),
|
||||
dict(name='div',
|
||||
attrs={'id':['socialBookmarks','seitenanfang']}),
|
||||
dict(name='ul',
|
||||
attrs={'class':['directLinks','directLinks weltatlas']}),
|
||||
dict(name='strong', attrs={'class':['boxTitle inv','inv']})
|
||||
dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio','infobox ','footer clearfix','inner recommendations','teaser teaser-08 nachrichten smallstandard','infobox-rechts','infobox-links','csl2','teaserBox metaBlock','articleA archiveDisclaimer']}),
|
||||
dict(name='div', attrs={'id':['pageFunctions']}), ## wdr.de
|
||||
dict(name='div', attrs={'class':['chart','footerService','toplink','assetsLeft','assetsFullsize']}), ## boerse.ard.de
|
||||
dict(name='div', attrs={'class':['ardMehrZumThemaLinks','socialBookmarks','ardContentEnd','ardDisclaimer']}), ## sportschau.de
|
||||
dict(name='div', attrs={'id':['socialBookmarks','seitenanfang','comment']}),
|
||||
dict(name='ul', attrs={'class':['directLinks','directLinks weltatlas','iconList','right']}),
|
||||
dict(name='strong', attrs={'class':['boxTitle inv','inv']}),
|
||||
dict(name='div', attrs={'class':['moreInfo right','moreInfo']}),
|
||||
dict(name='span', attrs={'class':['videoLink']}),
|
||||
dict(name='img', attrs={'class':['zoom float_right']}),
|
||||
dict(name='a', attrs={'id':['zoom']})
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'centerCol'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'centerCol'}),
|
||||
dict(name='div', attrs={'id':['mainColumn','ardContent']}),
|
||||
dict(name='div', attrs={'class':['narrow clearfix','beitrag','detail_inlay','containerArticle noBorder','span-8']})]
|
||||
|
||||
def get_masthead_url(self):
|
||||
return 'http://intern.tagesschau.de/html/img/image.jpg'
|
||||
|
||||
|
37
recipes/telegraph_in.recipe
Normal file
37
recipes/telegraph_in.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Telegraph(BasicNewsRecipe):
|
||||
title = u'The Telegraph India'
|
||||
language = 'en_IN'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
feeds = [
|
||||
('Front Page',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=3'),
|
||||
('Nation',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=4'),
|
||||
('Calcutta',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=5'),
|
||||
('Bengal',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=8'),
|
||||
('Bihar',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=22'),
|
||||
('Sports',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=7'),
|
||||
('International',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=13'),
|
||||
('Business',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=9'),
|
||||
('Entertainment',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=20'),
|
||||
('Opinion',
|
||||
'http://www.telegraphindia.com/feeds/rss.jsp?id=6'),
|
||||
]
|
||||
|
220
recipes/vancouver_province.recipe
Normal file
220
recipes/vancouver_province.recipe
Normal file
@ -0,0 +1,220 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
'''
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
title = u'Vancouver Province'
|
||||
url_prefix = 'http://www.theprovince.com'
|
||||
description = u'News from Vancouver, BC'
|
||||
fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
extra_css = '''
|
||||
.timestamp { font-size:xx-small; display: block; }
|
||||
#storyheader { font-size: medium; }
|
||||
#storyheader h1 { font-size: x-large; }
|
||||
#storyheader h2 { font-size: large; font-style: italic; }
|
||||
.byline { font-size:xx-small; }
|
||||
#photocaption { font-size: small; font-style: italic }
|
||||
#photocredit { font-size: xx-small; }'''
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||
remove_tags = [{'class':'comments'},
|
||||
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
||||
articles = {}
|
||||
key = 'News'
|
||||
ans = ['News']
|
||||
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||
#self.log(" div class = %s" % divtag['class'])
|
||||
if divtag['class'].startswith('section_title'):
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3,False)
|
||||
ans.append(key)
|
||||
self.log("Section name %s" % key)
|
||||
continue
|
||||
# div contains article data
|
||||
h1tag = divtag.find('h1')
|
||||
if not h1tag:
|
||||
continue
|
||||
atag = h1tag.find('a',href=True)
|
||||
if not atag:
|
||||
continue
|
||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||
#self.log("Section %s" % key)
|
||||
#self.log("url %s" % url)
|
||||
title = self.tag_to_string(atag,False)
|
||||
#self.log("title %s" % title)
|
||||
pubdate = ''
|
||||
description = ''
|
||||
ptag = divtag.find('p');
|
||||
if ptag:
|
||||
description = self.tag_to_string(ptag,False)
|
||||
#self.log("description %s" % description)
|
||||
author = ''
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag,False)
|
||||
#self.log("author %s" % author)
|
||||
if not articles.has_key(key):
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return ans
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,50 +7,72 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Vancouver Sun
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
title = u'Vancouver Sun'
|
||||
url_prefix = 'http://www.vancouversun.com'
|
||||
description = u'News from Vancouver, BC'
|
||||
fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following three lines for the Edmonton Journal
|
||||
#title = u'Edmonton Journal'
|
||||
#url_prefix = 'http://www.edmontonjournal.com'
|
||||
#description = u'News from Edmonton, AB'
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following three lines for the Calgary Herald
|
||||
#title = u'Calgary Herald'
|
||||
#url_prefix = 'http://www.calgaryherald.com'
|
||||
#description = u'News from Calgary, AB'
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following three lines for the Regina Leader-Post
|
||||
#title = u'Regina Leader-Post'
|
||||
#url_prefix = 'http://www.leaderpost.com'
|
||||
#description = u'News from Regina, SK'
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||
#title = u'Saskatoon Star-Phoenix'
|
||||
#url_prefix = 'http://www.thestarphoenix.com'
|
||||
#description = u'News from Saskatoon, SK'
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
#title = u'Windsor Star'
|
||||
#url_prefix = 'http://www.windsorstar.com'
|
||||
#description = u'News from Windsor, ON'
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
#title = u'Ottawa Citizen'
|
||||
#url_prefix = 'http://www.ottawacitizen.com'
|
||||
#description = u'News from Ottawa, ON'
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
@ -73,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -6,60 +7,72 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Victoria Times Colonist
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
title = u'Victoria Times Colonist'
|
||||
url_prefix = 'http://www.timescolonist.com'
|
||||
description = u'News from Victoria, BC'
|
||||
fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following three lines for the Vancouver Province
|
||||
#title = u'Vancouver Province'
|
||||
#url_prefix = 'http://www.theprovince.com'
|
||||
#description = u'News from Vancouver, BC'
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following three lines for the Vancouver Sun
|
||||
#title = u'Vancouver Sun'
|
||||
#url_prefix = 'http://www.vancouversun.com'
|
||||
#description = u'News from Vancouver, BC'
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following three lines for the Edmonton Journal
|
||||
#title = u'Edmonton Journal'
|
||||
#url_prefix = 'http://www.edmontonjournal.com'
|
||||
#description = u'News from Edmonton, AB'
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following three lines for the Calgary Herald
|
||||
#title = u'Calgary Herald'
|
||||
#url_prefix = 'http://www.calgaryherald.com'
|
||||
#description = u'News from Calgary, AB'
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following three lines for the Regina Leader-Post
|
||||
#title = u'Regina Leader-Post'
|
||||
#url_prefix = 'http://www.leaderpost.com'
|
||||
#description = u'News from Regina, SK'
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Saskatoon Star-Phoenix
|
||||
#title = u'Saskatoon Star-Phoenix'
|
||||
#url_prefix = 'http://www.thestarphoenix.com'
|
||||
#description = u'News from Saskatoon, SK'
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
#title = u'Windsor Star'
|
||||
#url_prefix = 'http://www.windsorstar.com'
|
||||
#description = u'News from Windsor, ON'
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
#title = u'Ottawa Citizen'
|
||||
#url_prefix = 'http://www.ottawacitizen.com'
|
||||
#description = u'News from Ottawa, ON'
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
#title = u'Montreal Gazette'
|
||||
#url_prefix = 'http://www.montrealgazette.com'
|
||||
#description = u'News from Montreal, QC'
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
@ -83,14 +96,80 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
#delete iempty id attributes--they screw up the TOC for unknow reasons
|
||||
divtags = soup.findAll('div',attrs={'id':''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del(div['id'])
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
@ -1,115 +0,0 @@
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Volkskrant_full(BasicNewsRecipe):
|
||||
# This recipe will download the Volkskrant newspaper,
|
||||
# from the subscribers site. It requires a password.
|
||||
# Known issues are: articles that are spread out over
|
||||
# multiple pages will appear multiple times. Pages
|
||||
# that contain only adverts will appear, but empty.
|
||||
# The supplement 'Volkskrant Magazine' on saturday
|
||||
# is currently not downloaded.
|
||||
# You can set a manual date, to download an archived
|
||||
# newspaper. Volkskrant stores over a month at the
|
||||
# moment of writing. To do so I suggest you unmark
|
||||
# the date on the line below, and insert it in the title. Then
|
||||
# follow the instructions marked further below.
|
||||
|
||||
title = 'De Volkskrant (subscription)' # [za, 13 nov 2010]'
|
||||
__author__ = u'Selcal'
|
||||
description = u"Volkskrant"
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
language = 'nl'
|
||||
use_embedded_content = False
|
||||
simultaneous_downloads = 1
|
||||
delay = 1
|
||||
needs_subscription = True
|
||||
# Set RETRIEVEDATE to 'yyyymmdd' to load an older
|
||||
# edition. Otherwise keep '%Y%m%d'
|
||||
# When setting a manual date, unmark and add the date
|
||||
# to the title above, and unmark the timefmt line to stop
|
||||
# Calibre from adding today's date in addition.
|
||||
|
||||
# timefmt = ''
|
||||
RETRIEVEDATE = strftime('%Y%m%d')
|
||||
INDEX_MAIN = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/#text'
|
||||
INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/'
|
||||
LOGIN = 'http://www.volkskrant.nl/vk/user/loggedIn.do'
|
||||
remove_tags = [dict(name='address')]
|
||||
cover_url = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/page.jpg'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(nr = 0)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
krant = []
|
||||
def strip_title(_title):
|
||||
i = 0
|
||||
while ((_title[i] <> ":") and (i <= len(_title))):
|
||||
i = i + 1
|
||||
return(_title[0:i])
|
||||
for temp in range (5):
|
||||
try:
|
||||
soup = self.index_to_soup(self.INDEX_MAIN)
|
||||
break
|
||||
except:
|
||||
#print '(Retrying main index load)'
|
||||
continue
|
||||
mainsoup = soup.find('td', attrs={'id': 'select_page_top'})
|
||||
for option in mainsoup.findAll('option'):
|
||||
articles = []
|
||||
_INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text'
|
||||
_INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/'
|
||||
#print ''
|
||||
#print '<------- Processing section: ' + _INDEX + ' ------------------------->'
|
||||
for temp in range (5):
|
||||
try:
|
||||
soup = self.index_to_soup(_INDEX)
|
||||
break
|
||||
except:
|
||||
#print '(Retrying index load)'
|
||||
continue
|
||||
for item in soup.findAll('area'):
|
||||
art_nr = item['class']
|
||||
attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)]
|
||||
#print '==> Found: ' + attrname;
|
||||
index_title = soup.find('div', attrs={'class': attrname})
|
||||
get_title = index_title['title'];
|
||||
_ARTICLE = _INDEX_ARTICLE + attrname + '.html#text'
|
||||
title = get_title;
|
||||
#print '--> Title: ' + title;
|
||||
#print '--> URL: ' + _ARTICLE;
|
||||
for temp in range (5):
|
||||
try:
|
||||
souparticle = self.index_to_soup(_ARTICLE);
|
||||
break
|
||||
except:
|
||||
print '(Retrying URL load)'
|
||||
continue
|
||||
headerurl = souparticle.findAll('frame')[0]['src'];
|
||||
#print '--> Read frame name for header: ' + headerurl;
|
||||
url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html';
|
||||
#print '--> Corrected URL: ' + url;
|
||||
if (get_title <> ''):
|
||||
title = strip_title(get_title)
|
||||
date = strftime(' %B %Y')
|
||||
if (title <> ''):
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
krant.append( (option.string, articles))
|
||||
return krant
|
||||
|
221
recipes/windsor_star.recipe
Normal file
221
recipes/windsor_star.recipe
Normal file
@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
'''
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
title = u'Windsor Star'
|
||||
url_prefix = 'http://www.windsorstar.com'
|
||||
description = u'News from Windsor, ON'
|
||||
fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
## title = u'Montreal Gazette'
|
||||
## url_prefix = 'http://www.montrealgazette.com'
|
||||
## description = u'News from Montreal, QC'
|
||||
## fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
extra_css = '''
|
||||
.timestamp { font-size:xx-small; display: block; }
|
||||
#storyheader { font-size: medium; }
|
||||
#storyheader h1 { font-size: x-large; }
|
||||
#storyheader h2 { font-size: large; font-style: italic; }
|
||||
.byline { font-size:xx-small; }
|
||||
#photocaption { font-size: small; font-style: italic }
|
||||
#photocredit { font-size: xx-small; }'''
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||
remove_tags = [{'class':'comments'},
|
||||
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
|
||||
articles = {}
|
||||
key = 'News'
|
||||
ans = ['News']
|
||||
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||
#self.log(" div class = %s" % divtag['class'])
|
||||
if divtag['class'].startswith('section_title'):
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3,False)
|
||||
ans.append(key)
|
||||
self.log("Section name %s" % key)
|
||||
continue
|
||||
# div contains article data
|
||||
h1tag = divtag.find('h1')
|
||||
if not h1tag:
|
||||
continue
|
||||
atag = h1tag.find('a',href=True)
|
||||
if not atag:
|
||||
continue
|
||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||
#self.log("Section %s" % key)
|
||||
#self.log("url %s" % url)
|
||||
title = self.tag_to_string(atag,False)
|
||||
#self.log("title %s" % title)
|
||||
pubdate = ''
|
||||
description = ''
|
||||
ptag = divtag.find('p');
|
||||
if ptag:
|
||||
description = self.tag_to_string(ptag,False)
|
||||
#self.log("description %s" % description)
|
||||
author = ''
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag,False)
|
||||
#self.log("author %s" % author)
|
||||
if not articles.has_key(key):
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return ans
|
@ -408,6 +408,8 @@ def cli_parser():
|
||||
epilog=epilog)
|
||||
sf = subparsers.add_parser('sourceforge', help='Upload to sourceforge',
|
||||
epilog=epilog)
|
||||
cron = subparsers.add_parser('cron', help='Call script from cron')
|
||||
|
||||
a = gc.add_argument
|
||||
|
||||
a('project',
|
||||
@ -433,18 +435,25 @@ def cli_parser():
|
||||
a('username',
|
||||
help='Sourceforge username')
|
||||
|
||||
a = cron.add_argument
|
||||
a('username',
|
||||
help='Username to log into your google account')
|
||||
a('password',
|
||||
help='Password to log into your google account')
|
||||
|
||||
return p
|
||||
|
||||
def main(args=None):
|
||||
cli = cli_parser()
|
||||
args = cli.parse_args(args)
|
||||
files = {}
|
||||
with args.file_map as f:
|
||||
for line in f:
|
||||
fname, _, desc = line.partition(':')
|
||||
fname, desc = fname.strip(), desc.strip()
|
||||
if fname and desc:
|
||||
files[fname] = desc
|
||||
if args.service != 'cron':
|
||||
with args.file_map as f:
|
||||
for line in f:
|
||||
fname, _, desc = line.partition(':')
|
||||
fname, desc = fname.strip(), desc.strip()
|
||||
if fname and desc:
|
||||
files[fname] = desc
|
||||
|
||||
ofiles = OrderedDict()
|
||||
for x in sorted(files, key=lambda x:os.stat(x).st_size, reverse=True):
|
||||
@ -460,6 +469,8 @@ def main(args=None):
|
||||
sf = SourceForge(ofiles, args.project, args.version, args.username,
|
||||
replace=args.replace)
|
||||
sf()
|
||||
elif args.service == 'cron':
|
||||
login_to_google(args.username, args.password)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -156,9 +156,6 @@ class Develop(Command):
|
||||
self.warn('Failed to compile mount helper. Auto mounting of',
|
||||
' devices will not work')
|
||||
|
||||
if not isbsd and os.geteuid() != 0:
|
||||
return self.warn('Must be run as root to compile mount helper. Auto '
|
||||
'mounting of devices will not work.')
|
||||
src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
|
||||
dest = os.path.join(self.staging_bindir, 'calibre-mount-helper')
|
||||
self.info('Installing mount helper to '+ dest)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -12,14 +12,14 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2011-11-03 23:08+0000\n"
|
||||
"PO-Revision-Date: 2012-02-01 20:12+0000\n"
|
||||
"Last-Translator: drMerry <Unknown>\n"
|
||||
"Language-Team: Dutch <vertaling@vrijschrift.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2011-11-26 05:12+0000\n"
|
||||
"X-Generator: Launchpad (build 14381)\n"
|
||||
"X-Launchpad-Export-Date: 2012-02-02 05:57+0000\n"
|
||||
"X-Generator: Launchpad (build 14738)\n"
|
||||
"Language: nl\n"
|
||||
|
||||
#. name for aaa
|
||||
@ -17956,7 +17956,7 @@ msgstr ""
|
||||
|
||||
#. name for nds
|
||||
msgid "German; Low"
|
||||
msgstr ""
|
||||
msgstr "Duits; Laag"
|
||||
|
||||
#. name for ndt
|
||||
msgid "Ndunga"
|
||||
@ -30424,7 +30424,7 @@ msgstr ""
|
||||
|
||||
#. name for zlm
|
||||
msgid "Malay (individual language)"
|
||||
msgstr ""
|
||||
msgstr "Maleis (aparte taal)"
|
||||
|
||||
#. name for zln
|
||||
msgid "Zhuang; Lianshan"
|
||||
|
@ -151,7 +151,7 @@ class Translations(POT): # {{{
|
||||
self.info('\tCopying ISO 639 translations')
|
||||
subprocess.check_call(['msgfmt', '-o', dest, iso639])
|
||||
elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc',
|
||||
'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml'):
|
||||
'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku'):
|
||||
self.warn('No ISO 639 translations for locale:', locale)
|
||||
|
||||
self.write_stats()
|
||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, subprocess, hashlib, shutil, glob, stat, sys, time
|
||||
import os, subprocess, hashlib, shutil, glob, stat, sys, time
|
||||
from subprocess import check_call
|
||||
from tempfile import NamedTemporaryFile, mkdtemp
|
||||
from zipfile import ZipFile
|
||||
@ -64,15 +64,11 @@ class ReUpload(Command): # {{{
|
||||
|
||||
# Data {{{
|
||||
def get_google_data():
|
||||
PASSWORD_FILE = os.path.expanduser('~/.googlecodecalibre')
|
||||
OFFLINEIMAP = os.path.expanduser('~/work/kde/conf/offlineimap/rc')
|
||||
with open(os.path.expanduser('~/work/kde/conf/googlecodecalibre'), 'rb') as f:
|
||||
gc_password, ga_un, pw = f.read().strip().split('|')
|
||||
|
||||
gc_password = open(PASSWORD_FILE).read().strip()
|
||||
raw = open(OFFLINEIMAP).read()
|
||||
pw = re.search(r'(?s)remoteuser = .*@gmail.com.*?remotepass = (\S+)',
|
||||
raw).group(1).strip()
|
||||
return {
|
||||
'username':'kovidgoyal@gmail.com', 'password':pw, 'gc_password':gc_password,
|
||||
'username':ga_un, 'password':pw, 'gc_password':gc_password,
|
||||
'path_map_server':'root@kovidgoyal.net',
|
||||
'path_map_location':'/var/www/status.calibre-ebook.com/googlepaths',
|
||||
# If you change this remember to change it in the
|
||||
|
@ -350,20 +350,20 @@ def get_proxy_info(proxy_scheme, proxy_string):
|
||||
USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13'
|
||||
USER_AGENT_MOBILE = 'Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016'
|
||||
|
||||
def random_user_agent():
|
||||
def random_user_agent(choose=None):
|
||||
choices = [
|
||||
'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
|
||||
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19',
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
|
||||
'Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0',
|
||||
'Mozilla/5.0 (Windows NT 6.2; rv:9.0.1) Gecko/20100101 Firefox/9.0.1',
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3',
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5',
|
||||
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
|
||||
]
|
||||
#return choices[-1]
|
||||
return choices[random.randint(0, len(choices)-1)]
|
||||
if choose is None:
|
||||
choose = random.randint(0, len(choices)-1)
|
||||
return choices[choose]
|
||||
|
||||
def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
||||
'''
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 37)
|
||||
numeric_version = (0, 8, 40)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -449,7 +449,7 @@ class CatalogPlugin(Plugin): # {{{
|
||||
['author_sort','authors','comments','cover','formats',
|
||||
'id','isbn','ondevice','pubdate','publisher','rating',
|
||||
'series_index','series','size','tags','timestamp',
|
||||
'title_sort','title','uuid','languages'])
|
||||
'title_sort','title','uuid','languages','identifiers'])
|
||||
all_custom_fields = set(db.custom_field_keys())
|
||||
for field in list(all_custom_fields):
|
||||
fm = db.field_metadata[field]
|
||||
|
@ -5,13 +5,14 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, glob, functools, re
|
||||
from calibre import guess_type
|
||||
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
||||
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase
|
||||
from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
|
||||
MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase)
|
||||
from calibre.constants import numeric_version
|
||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.ebooks.html.to_zip import HTML2ZIP
|
||||
|
||||
plugins = []
|
||||
|
||||
# To archive plugins {{{
|
||||
|
||||
class PML2PMLZ(FileTypePlugin):
|
||||
@ -86,6 +87,8 @@ class TXT2TXTZ(FileTypePlugin):
|
||||
return list(set(images))
|
||||
|
||||
def run(self, path_to_ebook):
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
|
||||
with open(path_to_ebook, 'rb') as ebf:
|
||||
txt = ebf.read()
|
||||
base_dir = os.path.dirname(path_to_ebook)
|
||||
@ -117,6 +120,7 @@ class TXT2TXTZ(FileTypePlugin):
|
||||
# No images so just import the TXT file.
|
||||
return path_to_ebook
|
||||
|
||||
plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract,]
|
||||
# }}}
|
||||
|
||||
# Metadata reader plugins {{{
|
||||
@ -399,6 +403,10 @@ class ZipMetadataReader(MetadataReaderPlugin):
|
||||
def get_metadata(self, stream, ftype):
|
||||
from calibre.ebooks.metadata.zip import get_metadata
|
||||
return get_metadata(stream)
|
||||
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
|
||||
# }}}
|
||||
|
||||
# Metadata writer plugins {{{
|
||||
@ -499,107 +507,51 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
|
||||
from calibre.ebooks.metadata.extz import set_metadata
|
||||
set_metadata(stream, mi)
|
||||
|
||||
# }}}
|
||||
|
||||
from calibre.ebooks.comic.input import ComicInput
|
||||
from calibre.ebooks.djvu.input import DJVUInput
|
||||
from calibre.ebooks.epub.input import EPUBInput
|
||||
from calibre.ebooks.fb2.input import FB2Input
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
from calibre.ebooks.htmlz.input import HTMLZInput
|
||||
from calibre.ebooks.lit.input import LITInput
|
||||
from calibre.ebooks.mobi.input import MOBIInput
|
||||
from calibre.ebooks.odt.input import ODTInput
|
||||
from calibre.ebooks.pdb.input import PDBInput
|
||||
from calibre.ebooks.azw4.input import AZW4Input
|
||||
from calibre.ebooks.pdf.input import PDFInput
|
||||
from calibre.ebooks.pml.input import PMLInput
|
||||
from calibre.ebooks.rb.input import RBInput
|
||||
from calibre.web.feeds.input import RecipeInput
|
||||
from calibre.ebooks.rtf.input import RTFInput
|
||||
from calibre.ebooks.tcr.input import TCRInput
|
||||
from calibre.ebooks.txt.input import TXTInput
|
||||
from calibre.ebooks.lrf.input import LRFInput
|
||||
from calibre.ebooks.chm.input import CHMInput
|
||||
from calibre.ebooks.snb.input import SNBInput
|
||||
|
||||
from calibre.ebooks.epub.output import EPUBOutput
|
||||
from calibre.ebooks.fb2.output import FB2Output
|
||||
from calibre.ebooks.lit.output import LITOutput
|
||||
from calibre.ebooks.lrf.output import LRFOutput
|
||||
from calibre.ebooks.mobi.output import MOBIOutput
|
||||
from calibre.ebooks.oeb.output import OEBOutput
|
||||
from calibre.ebooks.pdb.output import PDBOutput
|
||||
from calibre.ebooks.pdf.output import PDFOutput
|
||||
from calibre.ebooks.pml.output import PMLOutput
|
||||
from calibre.ebooks.rb.output import RBOutput
|
||||
from calibre.ebooks.rtf.output import RTFOutput
|
||||
from calibre.ebooks.tcr.output import TCROutput
|
||||
from calibre.ebooks.txt.output import TXTOutput
|
||||
from calibre.ebooks.txt.output import TXTZOutput
|
||||
from calibre.ebooks.html.output import HTMLOutput
|
||||
from calibre.ebooks.htmlz.output import HTMLZOutput
|
||||
from calibre.ebooks.snb.output import SNBOutput
|
||||
|
||||
from calibre.customize.profiles import input_profiles, output_profiles
|
||||
|
||||
from calibre.devices.apple.driver import ITUNES
|
||||
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
|
||||
from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK
|
||||
from calibre.devices.cybook.driver import CYBOOK, ORIZON
|
||||
from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
|
||||
BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
|
||||
POCKETBOOK701, POCKETBOOK360P, PI2)
|
||||
from calibre.devices.iliad.driver import ILIAD
|
||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
|
||||
KINDLE_FIRE)
|
||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||
from calibre.devices.prs505.driver import PRS505
|
||||
from calibre.devices.prst1.driver import PRST1
|
||||
from calibre.devices.user_defined.driver import USER_DEFINED
|
||||
from calibre.devices.android.driver import ANDROID, S60, WEBOS
|
||||
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||
from calibre.devices.eslick.driver import ESLICK, EBK52
|
||||
from calibre.devices.nuut2.driver import NUUT2
|
||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||
from calibre.devices.binatone.driver import README
|
||||
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
|
||||
LIBREAIR, ODYSSEY)
|
||||
from calibre.devices.edge.driver import EDGE
|
||||
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
||||
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
|
||||
from calibre.devices.sne.driver import SNE
|
||||
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G)
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
|
||||
|
||||
from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
|
||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||
|
||||
plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
|
||||
Epubcheck, ]
|
||||
|
||||
# New metadata download plugins {{{
|
||||
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
||||
from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
from calibre.ebooks.metadata.sources.douban import Douban
|
||||
from calibre.ebooks.metadata.sources.ozon import Ozon
|
||||
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataWriter')]
|
||||
|
||||
# }}}
|
||||
|
||||
# Conversion plugins {{{
|
||||
from calibre.ebooks.conversion.plugins.comic_input import ComicInput
|
||||
from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput
|
||||
from calibre.ebooks.conversion.plugins.epub_input import EPUBInput
|
||||
from calibre.ebooks.conversion.plugins.fb2_input import FB2Input
|
||||
from calibre.ebooks.conversion.plugins.html_input import HTMLInput
|
||||
from calibre.ebooks.conversion.plugins.htmlz_input import HTMLZInput
|
||||
from calibre.ebooks.conversion.plugins.lit_input import LITInput
|
||||
from calibre.ebooks.conversion.plugins.mobi_input import MOBIInput
|
||||
from calibre.ebooks.conversion.plugins.odt_input import ODTInput
|
||||
from calibre.ebooks.conversion.plugins.pdb_input import PDBInput
|
||||
from calibre.ebooks.conversion.plugins.azw4_input import AZW4Input
|
||||
from calibre.ebooks.conversion.plugins.pdf_input import PDFInput
|
||||
from calibre.ebooks.conversion.plugins.pml_input import PMLInput
|
||||
from calibre.ebooks.conversion.plugins.rb_input import RBInput
|
||||
from calibre.ebooks.conversion.plugins.recipe_input import RecipeInput
|
||||
from calibre.ebooks.conversion.plugins.rtf_input import RTFInput
|
||||
from calibre.ebooks.conversion.plugins.tcr_input import TCRInput
|
||||
from calibre.ebooks.conversion.plugins.txt_input import TXTInput
|
||||
from calibre.ebooks.conversion.plugins.lrf_input import LRFInput
|
||||
from calibre.ebooks.conversion.plugins.chm_input import CHMInput
|
||||
from calibre.ebooks.conversion.plugins.snb_input import SNBInput
|
||||
|
||||
from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput
|
||||
from calibre.ebooks.conversion.plugins.fb2_output import FB2Output
|
||||
from calibre.ebooks.conversion.plugins.lit_output import LITOutput
|
||||
from calibre.ebooks.conversion.plugins.lrf_output import LRFOutput
|
||||
from calibre.ebooks.conversion.plugins.mobi_output import MOBIOutput
|
||||
from calibre.ebooks.conversion.plugins.oeb_output import OEBOutput
|
||||
from calibre.ebooks.conversion.plugins.pdb_output import PDBOutput
|
||||
from calibre.ebooks.conversion.plugins.pdf_output import PDFOutput
|
||||
from calibre.ebooks.conversion.plugins.pml_output import PMLOutput
|
||||
from calibre.ebooks.conversion.plugins.rb_output import RBOutput
|
||||
from calibre.ebooks.conversion.plugins.rtf_output import RTFOutput
|
||||
from calibre.ebooks.conversion.plugins.tcr_output import TCROutput
|
||||
from calibre.ebooks.conversion.plugins.txt_output import TXTOutput, TXTZOutput
|
||||
from calibre.ebooks.conversion.plugins.html_output import HTMLOutput
|
||||
from calibre.ebooks.conversion.plugins.htmlz_output import HTMLZOutput
|
||||
from calibre.ebooks.conversion.plugins.snb_output import SNBOutput
|
||||
|
||||
plugins += [
|
||||
ComicInput,
|
||||
DJVUInput,
|
||||
@ -642,6 +594,66 @@ plugins += [
|
||||
HTMLZOutput,
|
||||
SNBOutput,
|
||||
]
|
||||
# }}}
|
||||
|
||||
# Catalog plugins {{{
|
||||
from calibre.library.catalogs.csv_xml import CSV_XML
|
||||
from calibre.library.catalogs.bibtex import BIBTEX
|
||||
from calibre.library.catalogs.epub_mobi import EPUB_MOBI
|
||||
plugins += [CSV_XML, BIBTEX, EPUB_MOBI]
|
||||
# }}}
|
||||
|
||||
# EPUB Fix plugins {{{
|
||||
from calibre.ebooks.epub.fix.unmanifested import Unmanifested
|
||||
from calibre.ebooks.epub.fix.epubcheck import Epubcheck
|
||||
plugins += [Unmanifested, Epubcheck]
|
||||
# }}}
|
||||
|
||||
# Profiles {{{
|
||||
from calibre.customize.profiles import input_profiles, output_profiles
|
||||
plugins += input_profiles + output_profiles
|
||||
# }}}
|
||||
|
||||
# Device driver plugins {{{
|
||||
from calibre.devices.apple.driver import ITUNES
|
||||
from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
|
||||
from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK
|
||||
from calibre.devices.cybook.driver import CYBOOK, ORIZON
|
||||
from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
|
||||
POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
|
||||
BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
|
||||
POCKETBOOK701, POCKETBOOK360P, PI2)
|
||||
from calibre.devices.iliad.driver import ILIAD
|
||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
||||
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
|
||||
KINDLE_FIRE)
|
||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||
from calibre.devices.prs505.driver import PRS505
|
||||
from calibre.devices.prst1.driver import PRST1
|
||||
from calibre.devices.user_defined.driver import USER_DEFINED
|
||||
from calibre.devices.android.driver import ANDROID, S60, WEBOS
|
||||
from calibre.devices.nokia.driver import N770, N810, E71X, E52
|
||||
from calibre.devices.eslick.driver import ESLICK, EBK52
|
||||
from calibre.devices.nuut2.driver import NUUT2
|
||||
from calibre.devices.iriver.driver import IRIVER_STORY
|
||||
from calibre.devices.binatone.driver import README
|
||||
from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
|
||||
LIBREAIR, ODYSSEY)
|
||||
from calibre.devices.edge.driver import EDGE
|
||||
from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
|
||||
SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
|
||||
from calibre.devices.sne.driver import SNE
|
||||
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
|
||||
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
|
||||
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G)
|
||||
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
|
||||
from calibre.devices.kobo.driver import KOBO
|
||||
from calibre.devices.bambook.driver import BAMBOOK
|
||||
from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
|
||||
|
||||
|
||||
|
||||
# Order here matters. The first matched device is the one used.
|
||||
plugins += [
|
||||
HANLINV3,
|
||||
@ -716,11 +728,20 @@ plugins += [
|
||||
BOEYE_BDX,
|
||||
USER_DEFINED,
|
||||
]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataWriter')]
|
||||
plugins += input_profiles + output_profiles
|
||||
# }}}
|
||||
|
||||
# New metadata download plugins {{{
|
||||
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
||||
from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
from calibre.ebooks.metadata.sources.douban import Douban
|
||||
from calibre.ebooks.metadata.sources.ozon import Ozon
|
||||
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
||||
|
||||
# }}}
|
||||
|
||||
# Interface Actions {{{
|
||||
|
||||
@ -1623,3 +1644,34 @@ plugins += [
|
||||
]
|
||||
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test load speed
|
||||
import subprocess, textwrap
|
||||
try:
|
||||
subprocess.check_call(['python', '-c', textwrap.dedent(
|
||||
'''
|
||||
from __future__ import print_function
|
||||
import time, sys, init_calibre
|
||||
st = time.time()
|
||||
import calibre.customize.builtins
|
||||
t = time.time() - st
|
||||
ret = 0
|
||||
|
||||
for x in ('lxml', 'calibre.ebooks.BeautifulSoup', 'uuid',
|
||||
'calibre.utils.terminfo', 'calibre.utils.magick', 'PIL', 'Image',
|
||||
'sqlite3', 'mechanize', 'httplib', 'xml'):
|
||||
if x in sys.modules:
|
||||
ret = 1
|
||||
print (x, 'has been loaded by a plugin')
|
||||
if ret:
|
||||
print ('\\nA good way to track down what is loading something is to run'
|
||||
' python -c "import init_calibre; import calibre.customize.builtins"')
|
||||
print()
|
||||
print ('Time taken to import all plugins: %.2f'%t)
|
||||
sys.exit(ret)
|
||||
|
||||
''')])
|
||||
except subprocess.CalledProcessError:
|
||||
raise SystemExit(1)
|
||||
|
||||
|
@ -5,7 +5,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from itertools import izip
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from calibre.customize import Plugin as _Plugin
|
||||
|
||||
@ -268,6 +267,7 @@ class OutputProfile(Plugin):
|
||||
|
||||
@classmethod
|
||||
def tags_to_string(cls, tags):
|
||||
from xml.sax.saxutils import escape
|
||||
return escape(', '.join(tags))
|
||||
|
||||
class iPadOutput(OutputProfile):
|
||||
|
@ -447,11 +447,14 @@ def plugin_for_catalog_format(fmt):
|
||||
|
||||
# }}}
|
||||
|
||||
def device_plugins(): # {{{
|
||||
def device_plugins(include_disabled=False): # {{{
|
||||
for plugin in _initialized_plugins:
|
||||
if isinstance(plugin, DevicePlugin):
|
||||
if not is_disabled(plugin):
|
||||
if include_disabled or not is_disabled(plugin):
|
||||
if platform in plugin.supported_platforms:
|
||||
if getattr(plugin, 'plugin_needs_delayed_initialization',
|
||||
False):
|
||||
plugin.do_delayed_plugin_initialization()
|
||||
yield plugin
|
||||
# }}}
|
||||
|
||||
@ -496,7 +499,7 @@ def initialize_plugin(plugin, path_to_zip_file):
|
||||
def has_external_plugins():
|
||||
return bool(config['plugins'])
|
||||
|
||||
def initialize_plugins():
|
||||
def initialize_plugins(perf=False):
|
||||
global _initialized_plugins
|
||||
_initialized_plugins = []
|
||||
conflicts = [name for name in config['plugins'] if name in
|
||||
@ -504,6 +507,11 @@ def initialize_plugins():
|
||||
for p in conflicts:
|
||||
remove_plugin(p)
|
||||
external_plugins = config['plugins']
|
||||
ostdout, ostderr = sys.stdout, sys.stderr
|
||||
if perf:
|
||||
from collections import defaultdict
|
||||
import time
|
||||
times = defaultdict(lambda:0)
|
||||
for zfp in list(external_plugins) + builtin_plugins:
|
||||
try:
|
||||
if not isinstance(zfp, type):
|
||||
@ -516,12 +524,22 @@ def initialize_plugins():
|
||||
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
|
||||
except PluginNotFound:
|
||||
continue
|
||||
if perf:
|
||||
st = time.time()
|
||||
plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
|
||||
if perf:
|
||||
times[plugin.name] = time.time() - st
|
||||
_initialized_plugins.append(plugin)
|
||||
except:
|
||||
print 'Failed to initialize plugin:', repr(zfp)
|
||||
if DEBUG:
|
||||
traceback.print_exc()
|
||||
# Prevent a custom plugin from overriding stdout/stderr as this breaks
|
||||
# ipython
|
||||
sys.stdout, sys.stderr = ostdout, ostderr
|
||||
if perf:
|
||||
for x in sorted(times, key=lambda x:times[x]):
|
||||
print ('%50s: %.3f'%(x, times[x]))
|
||||
_initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
|
||||
reread_filetype_plugins()
|
||||
reread_metadata_plugins()
|
||||
|
@ -38,6 +38,7 @@ class ANDROID(USBMS):
|
||||
0xca4 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xca9 : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xcac : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0xccf : [0x100, 0x0227, 0x0226, 0x222],
|
||||
0x2910 : [0x222],
|
||||
},
|
||||
|
||||
@ -52,6 +53,7 @@ class ANDROID(USBMS):
|
||||
0x70c6 : [0x226],
|
||||
0x4316 : [0x216],
|
||||
0x42d6 : [0x216],
|
||||
0x42d7 : [0x216],
|
||||
},
|
||||
# Freescale
|
||||
0x15a2 : {
|
||||
@ -68,14 +70,17 @@ class ANDROID(USBMS):
|
||||
0xd12e : [0x0100],
|
||||
0xe14f : [0x0226],
|
||||
0x614f : [0x0226, 0x100],
|
||||
0x6156 : [0x0226, 0x100],
|
||||
},
|
||||
|
||||
# Google
|
||||
0x18d1 : {
|
||||
0x0001 : [0x0223, 0x9999],
|
||||
0x0003 : [0x0230],
|
||||
0x4e11 : [0x0100, 0x226, 0x227],
|
||||
0x4e12 : [0x0100, 0x226, 0x227],
|
||||
0x4e21 : [0x0100, 0x226, 0x227, 0x231],
|
||||
0x4e22 : [0x0100, 0x226, 0x227],
|
||||
0xb058 : [0x0222, 0x226, 0x227],
|
||||
0x0ff9 : [0x0226],
|
||||
},
|
||||
@ -99,6 +104,7 @@ class ANDROID(USBMS):
|
||||
0xc001 : [0x0226],
|
||||
0xc004 : [0x0226],
|
||||
0x8801 : [0x0226, 0x0227],
|
||||
0xe115 : [0x0216], # PocketBook A10
|
||||
},
|
||||
|
||||
# Acer
|
||||
@ -163,7 +169,8 @@ class ANDROID(USBMS):
|
||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
|
||||
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP']
|
||||
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
|
||||
'POCKET', 'ONDA_MID']
|
||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
@ -176,13 +183,15 @@ class ANDROID(USBMS):
|
||||
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
|
||||
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
|
||||
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855']
|
||||
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
|
||||
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
|
||||
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
||||
'A1-07___C0541A4F', 'XT912', 'MB855']
|
||||
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
|
||||
'USB_2.0_DRIVER']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
@ -221,6 +230,20 @@ class ANDROID(USBMS):
|
||||
drives['main'] = letter_a
|
||||
return drives
|
||||
|
||||
@classmethod
|
||||
def configure_for_kindle_app(cls):
|
||||
proxy = cls._configProxy()
|
||||
proxy['format_map'] = ['mobi', 'azw', 'azw1', 'azw4', 'pdf']
|
||||
proxy['use_subdirs'] = False
|
||||
proxy['extra_customization'] = ','.join(['kindle']+cls.EBOOK_DIR_MAIN)
|
||||
|
||||
@classmethod
|
||||
def configure_for_generic_epub_app(cls):
|
||||
proxy = cls._configProxy()
|
||||
del proxy['format_map']
|
||||
del proxy['use_subdirs']
|
||||
del proxy['extra_customization']
|
||||
|
||||
class S60(USBMS):
|
||||
|
||||
name = 'S60 driver'
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
Sanda library wrapper
|
||||
'''
|
||||
|
||||
import ctypes, uuid, hashlib, os, sys
|
||||
import ctypes, hashlib, os, sys
|
||||
from threading import Event, Lock
|
||||
from calibre.constants import iswindows
|
||||
from calibre import load_library
|
||||
@ -350,6 +350,7 @@ class Bambook:
|
||||
return None
|
||||
|
||||
def SendFile(self, fileName, guid = None):
|
||||
import uuid
|
||||
if self.handle:
|
||||
taskID = job.NewJob()
|
||||
if guid:
|
||||
|
File diff suppressed because one or more lines are too long
@ -97,3 +97,13 @@ class FOLDER_DEVICE(USBMS):
|
||||
@classmethod
|
||||
def settings(self):
|
||||
return FOLDER_DEVICE_FOR_CONFIG._config().parse()
|
||||
|
||||
@classmethod
|
||||
def config_widget(cls):
|
||||
return FOLDER_DEVICE_FOR_CONFIG.config_widget()
|
||||
|
||||
@classmethod
|
||||
def save_settings(cls, config_widget):
|
||||
return FOLDER_DEVICE_FOR_CONFIG.save_settings(config_widget)
|
||||
|
||||
|
||||
|
@ -9,7 +9,6 @@ Generates and writes an APNX page mapping file.
|
||||
'''
|
||||
|
||||
import struct
|
||||
import uuid
|
||||
|
||||
from calibre.ebooks.mobi.reader import MobiReader
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
@ -20,7 +19,12 @@ class APNXBuilder(object):
|
||||
Create an APNX file using a pseudo page mapping.
|
||||
'''
|
||||
|
||||
def write_apnx(self, mobi_file_path, apnx_path, accurate=True):
|
||||
def write_apnx(self, mobi_file_path, apnx_path, accurate=True, page_count=0):
|
||||
'''
|
||||
If you want a fixed number of pages (such as from a custom column) then
|
||||
pass in a value to page_count, otherwise a count will be estimated
|
||||
using either the fast or accurate algorithm.
|
||||
'''
|
||||
# Check that this is really a MOBI file.
|
||||
with open(mobi_file_path, 'rb') as mf:
|
||||
ident = PdbHeaderReader(mf).identity()
|
||||
@ -29,16 +33,19 @@ class APNXBuilder(object):
|
||||
|
||||
# Get the pages depending on the chosen parser
|
||||
pages = []
|
||||
if accurate:
|
||||
try:
|
||||
pages = self.get_pages_accurate(mobi_file_path)
|
||||
except:
|
||||
# Fall back to the fast parser if we can't
|
||||
# use the accurate one. Typically this is
|
||||
# due to the file having DRM.
|
||||
pages = self.get_pages_fast(mobi_file_path)
|
||||
if page_count:
|
||||
pages = self.get_pages_exact(mobi_file_path, page_count)
|
||||
else:
|
||||
pages = self.get_pages_fast(mobi_file_path)
|
||||
if accurate:
|
||||
try:
|
||||
pages = self.get_pages_accurate(mobi_file_path)
|
||||
except:
|
||||
# Fall back to the fast parser if we can't
|
||||
# use the accurate one. Typically this is
|
||||
# due to the file having DRM.
|
||||
pages = self.get_pages_fast(mobi_file_path)
|
||||
else:
|
||||
pages = self.get_pages_fast(mobi_file_path)
|
||||
|
||||
if not pages:
|
||||
raise Exception(_('Could not generate page mapping.'))
|
||||
@ -51,6 +58,7 @@ class APNXBuilder(object):
|
||||
apnxf.write(apnx)
|
||||
|
||||
def generate_apnx(self, pages):
|
||||
import uuid
|
||||
apnx = ''
|
||||
|
||||
content_vals = {
|
||||
@ -77,6 +85,31 @@ class APNXBuilder(object):
|
||||
|
||||
return apnx
|
||||
|
||||
def get_pages_exact(self, mobi_file_path, page_count):
|
||||
'''
|
||||
Given a specified page count (such as from a custom column),
|
||||
create our array of pages for the apnx file by dividing by
|
||||
the content size of the book.
|
||||
'''
|
||||
pages = []
|
||||
count = 0
|
||||
|
||||
with open(mobi_file_path, 'rb') as mf:
|
||||
phead = PdbHeaderReader(mf)
|
||||
r0 = phead.section_data(0)
|
||||
text_length = struct.unpack('>I', r0[4:8])[0]
|
||||
|
||||
chars_per_page = int(text_length / page_count)
|
||||
while count < text_length:
|
||||
pages.append(count)
|
||||
count += chars_per_page
|
||||
|
||||
if len(pages) > page_count:
|
||||
# Rounding created extra page entries
|
||||
pages = pages[:page_count]
|
||||
|
||||
return pages
|
||||
|
||||
def get_pages_fast(self, mobi_file_path):
|
||||
'''
|
||||
2300 characters of uncompressed text per page. This is
|
||||
|
@ -10,10 +10,8 @@ Device driver for Amazon's Kindle
|
||||
|
||||
import datetime, os, re, sys, json, hashlib
|
||||
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
from calibre.devices.kindle.bookmark import Bookmark
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre import strftime
|
||||
|
||||
'''
|
||||
@ -152,6 +150,7 @@ class KINDLE(USBMS):
|
||||
path_map, book_ext = resolve_bookmark_paths(storage, path_map)
|
||||
|
||||
bookmarked_books = {}
|
||||
|
||||
for id in path_map:
|
||||
bookmark_ext = path_map[id].rpartition('.')[2]
|
||||
myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
|
||||
@ -236,6 +235,8 @@ class KINDLE(USBMS):
|
||||
|
||||
def add_annotation_to_library(self, db, db_id, annotation):
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
bm = annotation
|
||||
ignore_tags = set(['Catalog', 'Clippings'])
|
||||
|
||||
@ -301,19 +302,28 @@ class KINDLE2(KINDLE):
|
||||
' this information to the Kindle when uploading MOBI files by'
|
||||
' USB. Note that the page numbers do not correspond to any paper'
|
||||
' book.'),
|
||||
_('Use slower but more accurate page number generation') +
|
||||
_('Use slower but more accurate page number calculation') +
|
||||
':::' +
|
||||
_('There are two ways to generate the page number information. Using the more accurate '
|
||||
'generator will produce pages that correspond better to a printed book. '
|
||||
'However, this method is slower and will slow down sending files '
|
||||
'to the Kindle.'),
|
||||
_('Custom column name to retrieve page counts from') +
|
||||
':::' +
|
||||
_('If you have a custom column in your library that you use to '
|
||||
'store the page count of books, you can have calibre use that '
|
||||
'information, instead of calculating a page count. Specify the '
|
||||
'name of the custom column here, for example, #pages. '),
|
||||
|
||||
]
|
||||
EXTRA_CUSTOMIZATION_DEFAULT = [
|
||||
True,
|
||||
False,
|
||||
'',
|
||||
]
|
||||
OPT_APNX = 0
|
||||
OPT_APNX_ACCURATE = 1
|
||||
OPT_APNX_CUST_COL = 2
|
||||
|
||||
def books(self, oncard=None, end_session=True):
|
||||
bl = USBMS.books(self, oncard=oncard, end_session=end_session)
|
||||
@ -363,6 +373,8 @@ class KINDLE2(KINDLE):
|
||||
'''
|
||||
Hijacking this function to write the apnx file.
|
||||
'''
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
|
||||
opts = self.settings()
|
||||
if not opts.extra_customization[self.OPT_APNX]:
|
||||
return
|
||||
@ -377,10 +389,20 @@ class KINDLE2(KINDLE):
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
|
||||
cust_col_name = opts.extra_customization[self.OPT_APNX_CUST_COL]
|
||||
custom_page_count = 0
|
||||
if cust_col_name:
|
||||
try:
|
||||
custom_page_count = int(metadata.get(cust_col_name, 0))
|
||||
except:
|
||||
pass
|
||||
|
||||
apnx_path = '%s.apnx' % os.path.join(path, filename)
|
||||
apnx_builder = APNXBuilder()
|
||||
try:
|
||||
apnx_builder.write_apnx(filepath, apnx_path, accurate=opts.extra_customization[self.OPT_APNX_ACCURATE])
|
||||
apnx_builder.write_apnx(filepath, apnx_path,
|
||||
accurate=opts.extra_customization[self.OPT_APNX_ACCURATE],
|
||||
page_count=custom_page_count)
|
||||
except:
|
||||
print 'Failed to generate APNX'
|
||||
import traceback
|
||||
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
from contextlib import closing
|
||||
|
||||
import sqlite3 as sqlite
|
||||
|
||||
class Bookmark(): # {{{
|
||||
'''
|
||||
@ -32,7 +31,7 @@ class Bookmark(): # {{{
|
||||
|
||||
def get_bookmark_data(self):
|
||||
''' Return the timestamp and last_read_location '''
|
||||
|
||||
import sqlite3 as sqlite
|
||||
user_notes = {}
|
||||
self.timestamp = os.path.getmtime(self.path)
|
||||
with closing(sqlite.connect(self.db_path)) as connection:
|
||||
|
@ -6,7 +6,6 @@ __copyright__ = '2010, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, time, calendar
|
||||
import sqlite3 as sqlite
|
||||
from contextlib import closing
|
||||
from calibre.devices.usbms.books import BookList
|
||||
from calibre.devices.kobo.books import Book
|
||||
@ -16,7 +15,6 @@ from calibre.devices.mime import mime_type_ext
|
||||
from calibre.devices.usbms.driver import USBMS, debug_print
|
||||
from calibre import prints
|
||||
from calibre.devices.usbms.books import CollectionsBookList
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class KOBO(USBMS):
|
||||
@ -230,6 +228,7 @@ class KOBO(USBMS):
|
||||
traceback.print_exc()
|
||||
return changed
|
||||
|
||||
import sqlite3 as sqlite
|
||||
with closing(sqlite.connect(
|
||||
self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
@ -344,6 +343,7 @@ class KOBO(USBMS):
|
||||
# 2) volume_shorcover
|
||||
# 2) content
|
||||
|
||||
import sqlite3 as sqlite
|
||||
debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
|
||||
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
@ -739,6 +739,8 @@ class KOBO(USBMS):
|
||||
# Needs to be outside books collection as in the case of removing
|
||||
# the last book from the collection the list of books is empty
|
||||
# and the removal of the last book would not occur
|
||||
|
||||
import sqlite3 as sqlite
|
||||
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
|
||||
@ -850,6 +852,7 @@ class KOBO(USBMS):
|
||||
debug_print('FAILED to upload cover', filepath)
|
||||
|
||||
def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale):
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
if metadata.cover:
|
||||
cover = self.normalize_path(metadata.cover.replace('/', os.sep))
|
||||
|
||||
@ -859,6 +862,7 @@ class KOBO(USBMS):
|
||||
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(filepath)
|
||||
ContentID = self.contentid_from_path(filepath, ContentType)
|
||||
|
||||
import sqlite3 as sqlite
|
||||
with closing(sqlite.connect(self.normalize_path(self._main_prefix +
|
||||
'.kobo/KoboReader.sqlite'))) as connection:
|
||||
|
||||
|
@ -76,7 +76,7 @@ class E52(USBMS):
|
||||
supported_platforms = ['windows', 'linux', 'osx']
|
||||
|
||||
VENDOR_ID = [0x421]
|
||||
PRODUCT_ID = [0x1CD, 0x273]
|
||||
PRODUCT_ID = [0x1CD, 0x273, 0x00aa]
|
||||
BCD = [0x100]
|
||||
|
||||
|
||||
@ -86,5 +86,5 @@ class E52(USBMS):
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
VENDOR_NAME = 'NOKIA'
|
||||
WINDOWS_MAIN_MEM = 'S60'
|
||||
WINDOWS_MAIN_MEM = ['S60', 'E71']
|
||||
|
||||
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, time
|
||||
from base64 import b64decode
|
||||
from uuid import uuid4
|
||||
from lxml import etree
|
||||
from datetime import date
|
||||
|
||||
from calibre import prints, guess_type, isbytestring
|
||||
@ -78,6 +76,7 @@ def strftime(epoch, zone=time.localtime):
|
||||
return ' '.join(src)
|
||||
|
||||
def uuid():
|
||||
from uuid import uuid4
|
||||
return str(uuid4()).replace('-', '', 1).upper()
|
||||
|
||||
# }}}
|
||||
@ -85,6 +84,8 @@ def uuid():
|
||||
class XMLCache(object):
|
||||
|
||||
def __init__(self, paths, ext_paths, prefixes, use_author_sort):
|
||||
from lxml import etree
|
||||
|
||||
if DEBUG:
|
||||
debug_print('Building XMLCache...', paths)
|
||||
self.paths = paths
|
||||
@ -714,6 +715,8 @@ class XMLCache(object):
|
||||
|
||||
|
||||
def write(self):
|
||||
from lxml import etree
|
||||
|
||||
for i, path in self.paths.items():
|
||||
self.move_playlists_to_bottom()
|
||||
self.cleanup_whitespace(i)
|
||||
|
@ -12,8 +12,6 @@ Device driver for the SONY T1 devices
|
||||
'''
|
||||
|
||||
import os, time, re
|
||||
import sqlite3 as sqlite
|
||||
from sqlite3 import DatabaseError
|
||||
from contextlib import closing
|
||||
from datetime import date
|
||||
|
||||
@ -146,6 +144,8 @@ class PRST1(USBMS):
|
||||
return True
|
||||
|
||||
def books(self, oncard=None, end_session=True):
|
||||
import sqlite3 as sqlite
|
||||
|
||||
dummy_bl = BookList(None, None, None)
|
||||
|
||||
if (
|
||||
@ -246,6 +246,8 @@ class PRST1(USBMS):
|
||||
debug_print('PRST1: finished sync_booklists')
|
||||
|
||||
def update_device_database(self, booklist, collections_attributes, oncard):
|
||||
import sqlite3 as sqlite
|
||||
|
||||
debug_print('PRST1: starting update_device_database')
|
||||
|
||||
plugboard = None
|
||||
@ -274,6 +276,8 @@ class PRST1(USBMS):
|
||||
|
||||
def update_device_books(self, connection, booklist, source_id, plugboard,
|
||||
dbpath):
|
||||
from sqlite3 import DatabaseError
|
||||
|
||||
opts = self.settings()
|
||||
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
|
||||
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
|
||||
@ -489,6 +493,8 @@ class PRST1(USBMS):
|
||||
debug_print('PRS-T1: finished rebuild_collections')
|
||||
|
||||
def upload_cover(self, path, filename, metadata, filepath):
|
||||
import sqlite3 as sqlite
|
||||
|
||||
debug_print('PRS-T1: uploading cover')
|
||||
|
||||
if filepath.startswith(self._main_prefix):
|
||||
|
@ -8,7 +8,7 @@ manner.
|
||||
import sys, os, re
|
||||
from threading import RLock
|
||||
|
||||
from calibre.constants import iswindows, isosx, plugins, islinux
|
||||
from calibre.constants import iswindows, isosx, plugins, islinux, isfreebsd
|
||||
|
||||
osx_scanner = win_scanner = linux_scanner = None
|
||||
|
||||
@ -155,17 +155,78 @@ class LinuxScanner(object):
|
||||
ans.add(tuple(dev))
|
||||
return ans
|
||||
|
||||
class FreeBSDScanner(object):
|
||||
|
||||
def __call__(self):
|
||||
ans = set([])
|
||||
import dbus
|
||||
|
||||
try:
|
||||
bus = dbus.SystemBus()
|
||||
manager = dbus.Interface(bus.get_object('org.freedesktop.Hal',
|
||||
'/org/freedesktop/Hal/Manager'), 'org.freedesktop.Hal.Manager')
|
||||
paths = manager.FindDeviceStringMatch('freebsd.driver','da')
|
||||
for path in paths:
|
||||
obj = bus.get_object('org.freedesktop.Hal', path)
|
||||
objif = dbus.Interface(obj, 'org.freedesktop.Hal.Device')
|
||||
parentdriver = None
|
||||
while parentdriver != 'umass':
|
||||
try:
|
||||
obj = bus.get_object('org.freedesktop.Hal',
|
||||
objif.GetProperty('info.parent'))
|
||||
objif = dbus.Interface(obj, 'org.freedesktop.Hal.Device')
|
||||
try:
|
||||
parentdriver = objif.GetProperty('freebsd.driver')
|
||||
except dbus.exceptions.DBusException, e:
|
||||
continue
|
||||
except dbus.exceptions.DBusException, e:
|
||||
break
|
||||
if parentdriver != 'umass':
|
||||
continue
|
||||
dev = []
|
||||
try:
|
||||
dev.append(objif.GetProperty('usb.vendor_id'))
|
||||
dev.append(objif.GetProperty('usb.product_id'))
|
||||
dev.append(objif.GetProperty('usb.device_revision_bcd'))
|
||||
except dbus.exceptions.DBusException, e:
|
||||
continue
|
||||
try:
|
||||
dev.append(objif.GetProperty('info.vendor'))
|
||||
except:
|
||||
dev.append('')
|
||||
try:
|
||||
dev.append(objif.GetProperty('info.product'))
|
||||
except:
|
||||
dev.append('')
|
||||
try:
|
||||
dev.append(objif.GetProperty('usb.serial'))
|
||||
except:
|
||||
dev.append('')
|
||||
dev.append(path)
|
||||
ans.add(tuple(dev))
|
||||
except dbus.exceptions.DBusException, e:
|
||||
print >>sys.stderr, "Execution failed:", e
|
||||
return ans
|
||||
|
||||
|
||||
|
||||
linux_scanner = None
|
||||
|
||||
if islinux:
|
||||
linux_scanner = LinuxScanner()
|
||||
|
||||
freebsd_scanner = None
|
||||
|
||||
if isfreebsd:
|
||||
freebsd_scanner = FreeBSDScanner()
|
||||
|
||||
|
||||
class DeviceScanner(object):
|
||||
|
||||
def __init__(self, *args):
|
||||
if isosx and osx_scanner is None:
|
||||
raise RuntimeError('The Python extension usbobserver must be available on OS X.')
|
||||
self.scanner = win_scanner if iswindows else osx_scanner if isosx else linux_scanner
|
||||
self.scanner = win_scanner if iswindows else osx_scanner if isosx else freebsd_scanner if isfreebsd else linux_scanner
|
||||
self.devices = []
|
||||
|
||||
def scan(self):
|
||||
|
@ -677,19 +677,21 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
self._card_a_prefix = self._card_b_prefix
|
||||
self._card_b_prefix = None
|
||||
|
||||
|
||||
# ------------------------------------------------------
|
||||
#
|
||||
# open for FreeBSD
|
||||
# find the device node or nodes that match the S/N we already have from the scanner
|
||||
# and attempt to mount each one
|
||||
# 1. get list of disk devices from sysctl
|
||||
# 2. compare that list with the one from camcontrol
|
||||
# 3. and see if it has a matching s/n
|
||||
# 6. find any partitions/slices associated with each node
|
||||
# 7. attempt to mount, using calibre-mount-helper, each one
|
||||
# 8. when finished, we have a list of mount points and associated device nodes
|
||||
# find the device node or nodes that match the S/N we already have from the scanner
|
||||
# and attempt to mount each one
|
||||
# 1. get list of devices in /dev with matching s/n etc.
|
||||
# 2. get list of volumes associated with each
|
||||
# 3. attempt to mount each one using Hal
|
||||
# 4. when finished, we have a list of mount points and associated dbus nodes
|
||||
#
|
||||
def open_freebsd(self):
|
||||
import dbus
|
||||
# There should be some way to access the -v arg...
|
||||
verbose = False
|
||||
|
||||
# this gives us access to the S/N, etc. of the reader that the scanner has found
|
||||
# and the match routines for some of that data, like s/n, vendor ID, etc.
|
||||
@ -699,128 +701,146 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
raise DeviceError("Device has no S/N. Can't continue")
|
||||
return False
|
||||
|
||||
devs={}
|
||||
di=0
|
||||
ndevs=4 # number of possible devices per reader (main, carda, cardb, launcher)
|
||||
vols=[]
|
||||
|
||||
#get list of disk devices
|
||||
p=subprocess.Popen(["sysctl", "kern.disks"], stdout=subprocess.PIPE)
|
||||
kdsks=subprocess.Popen(["sed", "s/kern.disks: //"], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
|
||||
p.stdout.close()
|
||||
#print kdsks
|
||||
for dvc in kdsks.split():
|
||||
# for each one that's also in the list of cam devices ...
|
||||
p=subprocess.Popen(["camcontrol", "devlist"], stdout=subprocess.PIPE)
|
||||
devmatch=subprocess.Popen(["grep", dvc], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
|
||||
p.stdout.close()
|
||||
if devmatch:
|
||||
#print "Checking ", devmatch
|
||||
# ... see if we can get a S/N from the actual device node
|
||||
sn=subprocess.Popen(["camcontrol", "inquiry", dvc, "-S"], stdout=subprocess.PIPE).communicate()[0]
|
||||
sn=sn[0:-1] # drop the trailing newline
|
||||
#print "S/N = ", sn
|
||||
if sn and d.match_serial(sn):
|
||||
# we have a matching s/n, record this device node
|
||||
#print "match found: ", dvc
|
||||
devs[di]=dvc
|
||||
di += 1
|
||||
bus = dbus.SystemBus()
|
||||
manager = dbus.Interface(bus.get_object('org.freedesktop.Hal',
|
||||
'/org/freedesktop/Hal/Manager'), 'org.freedesktop.Hal.Manager')
|
||||
paths = manager.FindDeviceStringMatch('usb.serial',d.serial)
|
||||
for path in paths:
|
||||
objif = dbus.Interface(bus.get_object('org.freedesktop.Hal', path), 'org.freedesktop.Hal.Device')
|
||||
# Extra paranoia...
|
||||
try:
|
||||
if d.idVendor == objif.GetProperty('usb.vendor_id') and \
|
||||
d.idProduct == objif.GetProperty('usb.product_id') and \
|
||||
d.manufacturer == objif.GetProperty('usb.vendor') and \
|
||||
d.product == objif.GetProperty('usb.product') and \
|
||||
d.serial == objif.GetProperty('usb.serial'):
|
||||
dpaths = manager.FindDeviceStringMatch('storage.originating_device', path)
|
||||
for dpath in dpaths:
|
||||
#devif = dbus.Interface(bus.get_object('org.freedesktop.Hal', dpath), 'org.freedesktop.Hal.Device')
|
||||
try:
|
||||
vpaths = manager.FindDeviceStringMatch('block.storage_device', dpath)
|
||||
for vpath in vpaths:
|
||||
try:
|
||||
vdevif = dbus.Interface(bus.get_object('org.freedesktop.Hal', vpath), 'org.freedesktop.Hal.Device')
|
||||
if not vdevif.GetProperty('block.is_volume'):
|
||||
continue
|
||||
if vdevif.GetProperty('volume.fsusage') != 'filesystem':
|
||||
continue
|
||||
volif = dbus.Interface(bus.get_object('org.freedesktop.Hal', vpath), 'org.freedesktop.Hal.Device.Volume')
|
||||
pdevif = dbus.Interface(bus.get_object('org.freedesktop.Hal', vdevif.GetProperty('info.parent')), 'org.freedesktop.Hal.Device')
|
||||
vol = {'node': pdevif.GetProperty('block.device'),
|
||||
'dev': vdevif,
|
||||
'vol': volif,
|
||||
'label': vdevif.GetProperty('volume.label')}
|
||||
vols.append(vol)
|
||||
except dbus.exceptions.DBusException, e:
|
||||
print e
|
||||
continue
|
||||
except dbus.exceptions.DBusException, e:
|
||||
print e
|
||||
continue
|
||||
except dbus.exceptions.DBusException, e:
|
||||
continue
|
||||
|
||||
# sort the list of devices
|
||||
for i in range(1,ndevs+1):
|
||||
for j in reversed(range(1,i)):
|
||||
if devs[j-1] > devs[j]:
|
||||
x=devs[j-1]
|
||||
devs[j-1]=devs[j]
|
||||
devs[j]=x
|
||||
#print devs
|
||||
def ocmp(x,y):
|
||||
if x['node'] < y['node']:
|
||||
return -1
|
||||
if x['node'] > y['node']:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
vols.sort(cmp=ocmp)
|
||||
|
||||
if verbose:
|
||||
print "FBSD: ", vols
|
||||
|
||||
# now we need to see if any of these have slices/partitions
|
||||
mtd=0
|
||||
label="READER" # could use something more unique, like S/N or productID...
|
||||
cmd = '/usr/local/bin/calibre-mount-helper'
|
||||
cmd = [cmd, 'mount']
|
||||
for i in range(0,ndevs):
|
||||
cmd2="ls /dev/"+devs[i]+"*"
|
||||
p=subprocess.Popen(cmd2, shell=True, stdout=subprocess.PIPE)
|
||||
devs[i]=subprocess.Popen(["cut", "-d", "/", "-f" "3"], stdin=p.stdout, stdout=subprocess.PIPE).communicate()[0]
|
||||
p.stdout.close()
|
||||
|
||||
# try all the nodes to see what we can mount
|
||||
for dev in devs[i].split():
|
||||
mp='/media/'+label+'-'+dev
|
||||
mmp = mp
|
||||
if mmp.endswith('/'):
|
||||
mmp = mmp[:-1]
|
||||
#print "trying ", dev, "on", mp
|
||||
for vol in vols:
|
||||
mp = ''
|
||||
if vol['dev'].GetProperty('volume.is_mounted'):
|
||||
mp = vol['dev'].GetProperty('volume.mount_point')
|
||||
else:
|
||||
try:
|
||||
p = subprocess.Popen(cmd + ["/dev/"+dev, mmp])
|
||||
except OSError:
|
||||
raise DeviceError(_('Could not find mount helper: %s.')%cmd[0])
|
||||
while p.poll() is None:
|
||||
time.sleep(0.1)
|
||||
vol['vol'].Mount('Calibre-'+vol['label'],
|
||||
vol['dev'].GetProperty('volume.fstype'), [])
|
||||
loops = 0
|
||||
while not vol['dev'].GetProperty('volume.is_mounted'):
|
||||
time.sleep(1)
|
||||
loops += 1
|
||||
if loops > 100:
|
||||
print "ERROR: Timeout waiting for mount to complete"
|
||||
continue
|
||||
mp = vol['dev'].GetProperty('volume.mount_point')
|
||||
except dbus.exceptions.DBusException, e:
|
||||
print "Failed to mount ", e
|
||||
continue
|
||||
|
||||
if p.returncode == 0:
|
||||
#print " mounted", dev
|
||||
if i == 0:
|
||||
self._main_prefix = mp
|
||||
self._main_dev = "/dev/"+dev
|
||||
#print "main = ", self._main_dev, self._main_prefix
|
||||
if i == 1:
|
||||
self._card_a_prefix = mp
|
||||
self._card_a_dev = "/dev/"+dev
|
||||
#print "card a = ", self._card_a_dev, self._card_a_prefix
|
||||
if i == 2:
|
||||
self._card_b_prefix = mp
|
||||
self._card_b_dev = "/dev/"+dev
|
||||
#print "card b = ", self._card_b_dev, self._card_b_prefix
|
||||
# Mount Point becomes Mount Path
|
||||
mp += '/'
|
||||
|
||||
mtd += 1
|
||||
break
|
||||
if verbose:
|
||||
print "FBSD: mounted", vol['label'], "on", mp
|
||||
if mtd == 0:
|
||||
self._main_prefix = mp
|
||||
self._main_vol = vol['vol']
|
||||
if verbose:
|
||||
print "FBSD: main = ", self._main_prefix
|
||||
if mtd == 1:
|
||||
self._card_a_prefix = mp
|
||||
self._card_a_vol = vol['vol']
|
||||
if verbose:
|
||||
print "FBSD: card a = ", self._card_a_prefix
|
||||
if mtd == 2:
|
||||
self._card_b_prefix = mp
|
||||
self._card_b_vol = vol['vol']
|
||||
if verbose:
|
||||
print "FBSD: card b = ", self._card_b_prefix
|
||||
# Note that mtd is used as a bool... not incrementing is fine.
|
||||
break
|
||||
mtd += 1
|
||||
|
||||
if mtd > 0:
|
||||
return True
|
||||
else :
|
||||
return False
|
||||
raise DeviceError(_('Unable to mount the device'))
|
||||
|
||||
#
|
||||
# ------------------------------------------------------
|
||||
#
|
||||
# this one is pretty simple:
|
||||
# just umount each of the previously
|
||||
# mounted filesystems, using the mount helper
|
||||
# this one is pretty simple:
|
||||
# just umount each of the previously
|
||||
# mounted filesystems, using the stored volume object
|
||||
#
|
||||
def eject_freebsd(self):
|
||||
cmd = '/usr/local/bin/calibre-mount-helper'
|
||||
cmd = [cmd, 'eject']
|
||||
import dbus
|
||||
# There should be some way to access the -v arg...
|
||||
verbose = False
|
||||
|
||||
if self._main_prefix:
|
||||
#print "umount main:", cmd, self._main_dev, self._main_prefix
|
||||
if verbose:
|
||||
print "FBSD: umount main:", self._main_prefix
|
||||
try:
|
||||
p = subprocess.Popen(cmd + [self._main_dev, self._main_prefix])
|
||||
except OSError:
|
||||
raise DeviceError(
|
||||
_('Could not find mount helper: %s.')%cmd[0])
|
||||
while p.poll() is None:
|
||||
time.sleep(0.1)
|
||||
self._main_vol.Unmount([])
|
||||
except dbus.exceptions.DBusException, e:
|
||||
print 'Unable to eject ', e
|
||||
|
||||
if self._card_a_prefix:
|
||||
#print "umount card a:", cmd, self._card_a_dev, self._card_a_prefix
|
||||
if verbose:
|
||||
print "FBSD: umount card a:", self._card_a_prefix
|
||||
try:
|
||||
p = subprocess.Popen(cmd + [self._card_a_dev, self._card_a_prefix])
|
||||
except OSError:
|
||||
raise DeviceError(
|
||||
_('Could not find mount helper: %s.')%cmd[0])
|
||||
while p.poll() is None:
|
||||
time.sleep(0.1)
|
||||
self._card_a_vol.Unmount([])
|
||||
except dbus.exceptions.DBusException, e:
|
||||
print 'Unable to eject ', e
|
||||
|
||||
if self._card_b_prefix:
|
||||
#print "umount card b:", cmd, self._card_b_dev, self._card_b_prefix
|
||||
if verbose:
|
||||
print "FBSD: umount card b:", self._card_b_prefix
|
||||
try:
|
||||
p = subprocess.Popen(cmd + [self._card_b_dev, self._card_b_prefix])
|
||||
except OSError:
|
||||
raise DeviceError(
|
||||
_('Could not find mount helper: %s.')%cmd[0])
|
||||
while p.poll() is None:
|
||||
time.sleep(0.1)
|
||||
self._card_b_vol.Unmount([])
|
||||
except dbus.exceptions.DBusException, e:
|
||||
print 'Unable to eject ', e
|
||||
|
||||
self._main_prefix = None
|
||||
self._card_a_prefix = None
|
||||
@ -839,11 +859,10 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
time.sleep(7)
|
||||
self.open_linux()
|
||||
if isfreebsd:
|
||||
self._main_dev = self._card_a_dev = self._card_b_dev = None
|
||||
self._main_vol = self._card_a_vol = self._card_b_vol = None
|
||||
try:
|
||||
self.open_freebsd()
|
||||
except DeviceError:
|
||||
subprocess.Popen(["camcontrol", "rescan", "all"])
|
||||
time.sleep(2)
|
||||
self.open_freebsd()
|
||||
if iswindows:
|
||||
|
@ -10,7 +10,7 @@ driver. It is intended to be subclassed with the relevant parts implemented
|
||||
for a particular device.
|
||||
'''
|
||||
|
||||
import os, re, time, json, uuid, functools, shutil
|
||||
import os, re, time, json, functools, shutil
|
||||
from itertools import cycle
|
||||
|
||||
from calibre.constants import numeric_version
|
||||
@ -58,6 +58,7 @@ class USBMS(CLI, Device):
|
||||
SCAN_FROM_ROOT = False
|
||||
|
||||
def _update_driveinfo_record(self, dinfo, prefix, location_code, name=None):
|
||||
import uuid
|
||||
if not isinstance(dinfo, dict):
|
||||
dinfo = {}
|
||||
if dinfo.get('device_store_uuid', None) is None:
|
||||
|
@ -90,6 +90,10 @@ class USER_DEFINED(USBMS):
|
||||
OPT_CARD_A_FOLDER = 9
|
||||
|
||||
def initialize(self):
|
||||
self.plugin_needs_delayed_initialization = True
|
||||
USBMS.initialize(self)
|
||||
|
||||
def do_delayed_plugin_initialization(self):
|
||||
try:
|
||||
e = self.settings().extra_customization
|
||||
self.VENDOR_ID = int(e[self.OPT_USB_VENDOR_ID], 16)
|
||||
@ -107,4 +111,6 @@ class USER_DEFINED(USBMS):
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
USBMS.initialize(self)
|
||||
self.plugin_needs_delayed_initialization = False
|
||||
|
||||
|
||||
|
@ -8,7 +8,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, codecs
|
||||
from chardet import detect
|
||||
|
||||
ENCODING_PATS = [
|
||||
re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
|
||||
@ -34,8 +33,13 @@ def substitute_entites(raw):
|
||||
_CHARSET_ALIASES = { "macintosh" : "mac-roman",
|
||||
"x-sjis" : "shift-jis" }
|
||||
|
||||
def detect(*args, **kwargs):
|
||||
from chardet import detect
|
||||
return detect(*args, **kwargs)
|
||||
|
||||
def force_encoding(raw, verbose, assume_utf8=False):
|
||||
from calibre.constants import preferred_encoding
|
||||
|
||||
try:
|
||||
chardet = detect(raw[:1024*50])
|
||||
except:
|
||||
|
@ -7,11 +7,10 @@ __docformat__ = 'restructuredtext en'
|
||||
Based on ideas from comiclrf created by FangornUK.
|
||||
'''
|
||||
|
||||
import os, shutil, traceback, textwrap, time, codecs
|
||||
import os, traceback, time
|
||||
from Queue import Empty
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre import extract, CurrentDir, prints, walk
|
||||
from calibre import extract, prints, walk
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.ipc.server import Server
|
||||
@ -273,245 +272,4 @@ def process_pages(pages, opts, update, tdir):
|
||||
return ans, failures
|
||||
|
||||
|
||||
class ComicInput(InputFormatPlugin):
|
||||
|
||||
name = 'Comic Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
|
||||
file_types = set(['cbz', 'cbr', 'cbc'])
|
||||
is_image_collection = True
|
||||
core_usage = -1
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='colors', recommended_value=256,
|
||||
help=_('Number of colors for grayscale image conversion. Default: '
|
||||
'%default. Values of less than 256 may result in blurred text '
|
||||
'on your device if you are creating your comics in EPUB format.')),
|
||||
OptionRecommendation(name='dont_normalize', recommended_value=False,
|
||||
help=_('Disable normalize (improve contrast) color range '
|
||||
'for pictures. Default: False')),
|
||||
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
|
||||
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
|
||||
OptionRecommendation(name='dont_sharpen', recommended_value=False,
|
||||
help=_('Disable sharpening.')),
|
||||
OptionRecommendation(name='disable_trim', recommended_value=False,
|
||||
help=_('Disable trimming of comic pages. For some comics, '
|
||||
'trimming might remove content as well as borders.')),
|
||||
OptionRecommendation(name='landscape', recommended_value=False,
|
||||
help=_("Don't split landscape images into two portrait images")),
|
||||
OptionRecommendation(name='wide', recommended_value=False,
|
||||
help=_("Keep aspect ratio and scale image using screen height as "
|
||||
"image width for viewing in landscape mode.")),
|
||||
OptionRecommendation(name='right2left', recommended_value=False,
|
||||
help=_('Used for right-to-left publications like manga. '
|
||||
'Causes landscape pages to be split into portrait pages '
|
||||
'from right to left.')),
|
||||
OptionRecommendation(name='despeckle', recommended_value=False,
|
||||
help=_('Enable Despeckle. Reduces speckle noise. '
|
||||
'May greatly increase processing time.')),
|
||||
OptionRecommendation(name='no_sort', recommended_value=False,
|
||||
help=_("Don't sort the files found in the comic "
|
||||
"alphabetically by name. Instead use the order they were "
|
||||
"added to the comic.")),
|
||||
OptionRecommendation(name='output_format', choices=['png', 'jpg'],
|
||||
recommended_value='png', help=_('The format that images in the created ebook '
|
||||
'are converted to. You can experiment to see which format gives '
|
||||
'you optimal size and look on your device.')),
|
||||
OptionRecommendation(name='no_process', recommended_value=False,
|
||||
help=_("Apply no processing to the image")),
|
||||
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
||||
help=_('Do not convert the image to grayscale (black and white)')),
|
||||
OptionRecommendation(name='comic_image_size', recommended_value=None,
|
||||
help=_('Specify the image size as widthxheight pixels. Normally,'
|
||||
' an image size is automatically calculated from the output '
|
||||
'profile, this option overrides it.')),
|
||||
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
|
||||
help=_('When converting a CBC do not add links to each page to'
|
||||
' the TOC. Note this only applies if the TOC has more than one'
|
||||
' section')),
|
||||
])
|
||||
|
||||
recommendations = set([
|
||||
('margin_left', 0, OptionRecommendation.HIGH),
|
||||
('margin_top', 0, OptionRecommendation.HIGH),
|
||||
('margin_right', 0, OptionRecommendation.HIGH),
|
||||
('margin_bottom', 0, OptionRecommendation.HIGH),
|
||||
('insert_blank_line', False, OptionRecommendation.HIGH),
|
||||
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
|
||||
('change_justification', 'left', OptionRecommendation.HIGH),
|
||||
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
|
||||
('chapter', None, OptionRecommendation.HIGH),
|
||||
('page_breaks_brefore', None, OptionRecommendation.HIGH),
|
||||
('use_auto_toc', False, OptionRecommendation.HIGH),
|
||||
('page_breaks_before', None, OptionRecommendation.HIGH),
|
||||
('disable_font_rescaling', True, OptionRecommendation.HIGH),
|
||||
('linearize_tables', False, OptionRecommendation.HIGH),
|
||||
])
|
||||
|
||||
def get_comics_from_collection(self, stream):
|
||||
from calibre.libunzip import extract as zipextract
|
||||
tdir = PersistentTemporaryDirectory('_comic_collection')
|
||||
zipextract(stream, tdir)
|
||||
comics = []
|
||||
with CurrentDir(tdir):
|
||||
if not os.path.exists('comics.txt'):
|
||||
raise ValueError((
|
||||
'%s is not a valid comic collection'
|
||||
' no comics.txt was found in the file')
|
||||
%stream.name)
|
||||
raw = open('comics.txt', 'rb').read()
|
||||
if raw.startswith(codecs.BOM_UTF16_BE):
|
||||
raw = raw.decode('utf-16-be')[1:]
|
||||
elif raw.startswith(codecs.BOM_UTF16_LE):
|
||||
raw = raw.decode('utf-16-le')[1:]
|
||||
elif raw.startswith(codecs.BOM_UTF8):
|
||||
raw = raw.decode('utf-8')[1:]
|
||||
else:
|
||||
raw = raw.decode('utf-8')
|
||||
for line in raw.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
fname, title = line.partition(':')[0], line.partition(':')[-1]
|
||||
fname = fname.replace('#', '_')
|
||||
fname = os.path.join(tdir, *fname.split('/'))
|
||||
if not title:
|
||||
title = os.path.basename(fname).rpartition('.')[0]
|
||||
if os.access(fname, os.R_OK):
|
||||
comics.append([title, fname])
|
||||
if not comics:
|
||||
raise ValueError('%s has no comics'%stream.name)
|
||||
return comics
|
||||
|
||||
def get_pages(self, comic, tdir2):
|
||||
tdir = extract_comic(comic)
|
||||
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
|
||||
verbose=self.opts.verbose)
|
||||
thumbnail = None
|
||||
if not new_pages:
|
||||
raise ValueError('Could not find any pages in the comic: %s'
|
||||
%comic)
|
||||
if self.opts.no_process:
|
||||
n2 = []
|
||||
for page in new_pages:
|
||||
n2.append(os.path.join(tdir2, os.path.basename(page)))
|
||||
shutil.copyfile(page, n2[-1])
|
||||
new_pages = n2
|
||||
else:
|
||||
new_pages, failures = process_pages(new_pages, self.opts,
|
||||
self.report_progress, tdir2)
|
||||
if failures:
|
||||
self.log.warning('Could not process the following pages '
|
||||
'(run with --verbose to see why):')
|
||||
for f in failures:
|
||||
self.log.warning('\t', f)
|
||||
if not new_pages:
|
||||
raise ValueError('Could not find any valid pages in comic: %s'
|
||||
% comic)
|
||||
thumbnail = os.path.join(tdir2,
|
||||
'thumbnail.'+self.opts.output_format.lower())
|
||||
if not os.access(thumbnail, os.R_OK):
|
||||
thumbnail = None
|
||||
return new_pages
|
||||
|
||||
def get_images(self):
|
||||
return self._images
|
||||
|
||||
def convert(self, stream, opts, file_ext, log, accelerators):
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
|
||||
self.opts, self.log= opts, log
|
||||
if file_ext == 'cbc':
|
||||
comics_ = self.get_comics_from_collection(stream)
|
||||
else:
|
||||
comics_ = [['Comic', os.path.abspath(stream.name)]]
|
||||
stream.close()
|
||||
comics = []
|
||||
for i, x in enumerate(comics_):
|
||||
title, fname = x
|
||||
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
|
||||
cdir = os.path.abspath(cdir)
|
||||
if not os.path.exists(cdir):
|
||||
os.makedirs(cdir)
|
||||
pages = self.get_pages(fname, cdir)
|
||||
if not pages: continue
|
||||
wrappers = self.create_wrappers(pages)
|
||||
comics.append((title, pages, wrappers))
|
||||
|
||||
if not comics:
|
||||
raise ValueError('No comic pages found in %s'%stream.name)
|
||||
|
||||
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
|
||||
[_('Unknown')])
|
||||
opf = OPFCreator(os.path.abspath('.'), mi)
|
||||
entries = []
|
||||
|
||||
def href(x):
|
||||
if len(comics) == 1: return os.path.basename(x)
|
||||
return '/'.join(x.split(os.sep)[-2:])
|
||||
|
||||
for comic in comics:
|
||||
pages, wrappers = comic[1:]
|
||||
entries += [(w, None) for w in map(href, wrappers)] + \
|
||||
[(x, None) for x in map(href, pages)]
|
||||
opf.create_manifest(entries)
|
||||
spine = []
|
||||
for comic in comics:
|
||||
spine.extend(map(href, comic[2]))
|
||||
self._images = []
|
||||
for comic in comics:
|
||||
self._images.extend(comic[1])
|
||||
opf.create_spine(spine)
|
||||
toc = TOC()
|
||||
if len(comics) == 1:
|
||||
wrappers = comics[0][2]
|
||||
for i, x in enumerate(wrappers):
|
||||
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
|
||||
play_order=i)
|
||||
else:
|
||||
po = 0
|
||||
for comic in comics:
|
||||
po += 1
|
||||
wrappers = comic[2]
|
||||
stoc = toc.add_item(href(wrappers[0]),
|
||||
None, comic[0], play_order=po)
|
||||
if not opts.dont_add_comic_pages_to_toc:
|
||||
for i, x in enumerate(wrappers):
|
||||
stoc.add_item(href(x), None,
|
||||
_('Page')+' %d'%(i+1), play_order=po)
|
||||
po += 1
|
||||
opf.set_toc(toc)
|
||||
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
|
||||
opf.render(m, n, 'toc.ncx')
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
def create_wrappers(self, pages):
|
||||
from calibre.ebooks.oeb.base import XHTML_NS
|
||||
wrappers = []
|
||||
WRAPPER = textwrap.dedent('''\
|
||||
<html xmlns="%s">
|
||||
<head>
|
||||
<title>Page #%d</title>
|
||||
<style type="text/css">
|
||||
@page { margin:0pt; padding: 0pt}
|
||||
body { margin: 0pt; padding: 0pt}
|
||||
div { text-align: center }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<img src="%s" alt="comic page #%d" />
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
''')
|
||||
dir = os.path.dirname(pages[0])
|
||||
for i, page in enumerate(pages):
|
||||
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
|
||||
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
|
||||
open(page, 'wb').write(wrapper)
|
||||
wrappers.append(page)
|
||||
return wrappers
|
||||
|
||||
|
11
src/calibre/ebooks/conversion/plugins/__init__.py
Normal file
11
src/calibre/ebooks/conversion/plugins/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.azw4.reader import Reader
|
||||
|
||||
class AZW4Input(InputFormatPlugin):
|
||||
|
||||
@ -19,6 +17,9 @@ class AZW4Input(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.azw4.reader import Reader
|
||||
|
||||
header = PdbHeaderReader(stream)
|
||||
reader = Reader(header, stream, log, options)
|
||||
opf = reader.extract_content(os.getcwd())
|
@ -3,9 +3,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, uuid
|
||||
|
||||
from lxml import html
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
@ -77,7 +75,7 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
||||
# use HTMLInput plugin to generate book
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
from calibre.customize.builtins import HTMLInput
|
||||
opts.breadth_first = True
|
||||
htmlinput = HTMLInput(None)
|
||||
oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
|
||||
@ -85,6 +83,8 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
|
||||
def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
|
||||
import uuid
|
||||
from lxml import html
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import DirContainer
|
||||
oeb = create_oebbook(log, None, opts,
|
||||
@ -142,6 +142,7 @@ class CHMInput(InputFormatPlugin):
|
||||
return oeb
|
||||
|
||||
def _create_html_root(self, hhcpath, log):
|
||||
from lxml import html
|
||||
hhcdata = self._read_file(hhcpath)
|
||||
hhcroot = html.fromstring(hhcdata)
|
||||
chapters = self._process_nodes(hhcroot)
|
259
src/calibre/ebooks/conversion/plugins/comic_input.py
Normal file
259
src/calibre/ebooks/conversion/plugins/comic_input.py
Normal file
@ -0,0 +1,259 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Based on ideas from comiclrf created by FangornUK.
|
||||
'''
|
||||
|
||||
import shutil, textwrap, codecs, os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre import CurrentDir
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
|
||||
class ComicInput(InputFormatPlugin):
|
||||
|
||||
name = 'Comic Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
|
||||
file_types = set(['cbz', 'cbr', 'cbc'])
|
||||
is_image_collection = True
|
||||
core_usage = -1
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='colors', recommended_value=256,
|
||||
help=_('Number of colors for grayscale image conversion. Default: '
|
||||
'%default. Values of less than 256 may result in blurred text '
|
||||
'on your device if you are creating your comics in EPUB format.')),
|
||||
OptionRecommendation(name='dont_normalize', recommended_value=False,
|
||||
help=_('Disable normalize (improve contrast) color range '
|
||||
'for pictures. Default: False')),
|
||||
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
|
||||
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
|
||||
OptionRecommendation(name='dont_sharpen', recommended_value=False,
|
||||
help=_('Disable sharpening.')),
|
||||
OptionRecommendation(name='disable_trim', recommended_value=False,
|
||||
help=_('Disable trimming of comic pages. For some comics, '
|
||||
'trimming might remove content as well as borders.')),
|
||||
OptionRecommendation(name='landscape', recommended_value=False,
|
||||
help=_("Don't split landscape images into two portrait images")),
|
||||
OptionRecommendation(name='wide', recommended_value=False,
|
||||
help=_("Keep aspect ratio and scale image using screen height as "
|
||||
"image width for viewing in landscape mode.")),
|
||||
OptionRecommendation(name='right2left', recommended_value=False,
|
||||
help=_('Used for right-to-left publications like manga. '
|
||||
'Causes landscape pages to be split into portrait pages '
|
||||
'from right to left.')),
|
||||
OptionRecommendation(name='despeckle', recommended_value=False,
|
||||
help=_('Enable Despeckle. Reduces speckle noise. '
|
||||
'May greatly increase processing time.')),
|
||||
OptionRecommendation(name='no_sort', recommended_value=False,
|
||||
help=_("Don't sort the files found in the comic "
|
||||
"alphabetically by name. Instead use the order they were "
|
||||
"added to the comic.")),
|
||||
OptionRecommendation(name='output_format', choices=['png', 'jpg'],
|
||||
recommended_value='png', help=_('The format that images in the created ebook '
|
||||
'are converted to. You can experiment to see which format gives '
|
||||
'you optimal size and look on your device.')),
|
||||
OptionRecommendation(name='no_process', recommended_value=False,
|
||||
help=_("Apply no processing to the image")),
|
||||
OptionRecommendation(name='dont_grayscale', recommended_value=False,
|
||||
help=_('Do not convert the image to grayscale (black and white)')),
|
||||
OptionRecommendation(name='comic_image_size', recommended_value=None,
|
||||
help=_('Specify the image size as widthxheight pixels. Normally,'
|
||||
' an image size is automatically calculated from the output '
|
||||
'profile, this option overrides it.')),
|
||||
OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
|
||||
help=_('When converting a CBC do not add links to each page to'
|
||||
' the TOC. Note this only applies if the TOC has more than one'
|
||||
' section')),
|
||||
])
|
||||
|
||||
recommendations = set([
|
||||
('margin_left', 0, OptionRecommendation.HIGH),
|
||||
('margin_top', 0, OptionRecommendation.HIGH),
|
||||
('margin_right', 0, OptionRecommendation.HIGH),
|
||||
('margin_bottom', 0, OptionRecommendation.HIGH),
|
||||
('insert_blank_line', False, OptionRecommendation.HIGH),
|
||||
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
|
||||
('change_justification', 'left', OptionRecommendation.HIGH),
|
||||
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
|
||||
('chapter', None, OptionRecommendation.HIGH),
|
||||
('page_breaks_brefore', None, OptionRecommendation.HIGH),
|
||||
('use_auto_toc', False, OptionRecommendation.HIGH),
|
||||
('page_breaks_before', None, OptionRecommendation.HIGH),
|
||||
('disable_font_rescaling', True, OptionRecommendation.HIGH),
|
||||
('linearize_tables', False, OptionRecommendation.HIGH),
|
||||
])
|
||||
|
||||
def get_comics_from_collection(self, stream):
|
||||
from calibre.libunzip import extract as zipextract
|
||||
tdir = PersistentTemporaryDirectory('_comic_collection')
|
||||
zipextract(stream, tdir)
|
||||
comics = []
|
||||
with CurrentDir(tdir):
|
||||
if not os.path.exists('comics.txt'):
|
||||
raise ValueError((
|
||||
'%s is not a valid comic collection'
|
||||
' no comics.txt was found in the file')
|
||||
%stream.name)
|
||||
raw = open('comics.txt', 'rb').read()
|
||||
if raw.startswith(codecs.BOM_UTF16_BE):
|
||||
raw = raw.decode('utf-16-be')[1:]
|
||||
elif raw.startswith(codecs.BOM_UTF16_LE):
|
||||
raw = raw.decode('utf-16-le')[1:]
|
||||
elif raw.startswith(codecs.BOM_UTF8):
|
||||
raw = raw.decode('utf-8')[1:]
|
||||
else:
|
||||
raw = raw.decode('utf-8')
|
||||
for line in raw.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
fname, title = line.partition(':')[0], line.partition(':')[-1]
|
||||
fname = fname.replace('#', '_')
|
||||
fname = os.path.join(tdir, *fname.split('/'))
|
||||
if not title:
|
||||
title = os.path.basename(fname).rpartition('.')[0]
|
||||
if os.access(fname, os.R_OK):
|
||||
comics.append([title, fname])
|
||||
if not comics:
|
||||
raise ValueError('%s has no comics'%stream.name)
|
||||
return comics
|
||||
|
||||
def get_pages(self, comic, tdir2):
|
||||
from calibre.ebooks.comic.input import (extract_comic, process_pages,
|
||||
find_pages)
|
||||
tdir = extract_comic(comic)
|
||||
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
|
||||
verbose=self.opts.verbose)
|
||||
thumbnail = None
|
||||
if not new_pages:
|
||||
raise ValueError('Could not find any pages in the comic: %s'
|
||||
%comic)
|
||||
if self.opts.no_process:
|
||||
n2 = []
|
||||
for page in new_pages:
|
||||
n2.append(os.path.join(tdir2, os.path.basename(page)))
|
||||
shutil.copyfile(page, n2[-1])
|
||||
new_pages = n2
|
||||
else:
|
||||
new_pages, failures = process_pages(new_pages, self.opts,
|
||||
self.report_progress, tdir2)
|
||||
if failures:
|
||||
self.log.warning('Could not process the following pages '
|
||||
'(run with --verbose to see why):')
|
||||
for f in failures:
|
||||
self.log.warning('\t', f)
|
||||
if not new_pages:
|
||||
raise ValueError('Could not find any valid pages in comic: %s'
|
||||
% comic)
|
||||
thumbnail = os.path.join(tdir2,
|
||||
'thumbnail.'+self.opts.output_format.lower())
|
||||
if not os.access(thumbnail, os.R_OK):
|
||||
thumbnail = None
|
||||
return new_pages
|
||||
|
||||
def get_images(self):
|
||||
return self._images
|
||||
|
||||
def convert(self, stream, opts, file_ext, log, accelerators):
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
|
||||
self.opts, self.log= opts, log
|
||||
if file_ext == 'cbc':
|
||||
comics_ = self.get_comics_from_collection(stream)
|
||||
else:
|
||||
comics_ = [['Comic', os.path.abspath(stream.name)]]
|
||||
stream.close()
|
||||
comics = []
|
||||
for i, x in enumerate(comics_):
|
||||
title, fname = x
|
||||
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
|
||||
cdir = os.path.abspath(cdir)
|
||||
if not os.path.exists(cdir):
|
||||
os.makedirs(cdir)
|
||||
pages = self.get_pages(fname, cdir)
|
||||
if not pages: continue
|
||||
wrappers = self.create_wrappers(pages)
|
||||
comics.append((title, pages, wrappers))
|
||||
|
||||
if not comics:
|
||||
raise ValueError('No comic pages found in %s'%stream.name)
|
||||
|
||||
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
|
||||
[_('Unknown')])
|
||||
opf = OPFCreator(os.path.abspath('.'), mi)
|
||||
entries = []
|
||||
|
||||
def href(x):
|
||||
if len(comics) == 1: return os.path.basename(x)
|
||||
return '/'.join(x.split(os.sep)[-2:])
|
||||
|
||||
for comic in comics:
|
||||
pages, wrappers = comic[1:]
|
||||
entries += [(w, None) for w in map(href, wrappers)] + \
|
||||
[(x, None) for x in map(href, pages)]
|
||||
opf.create_manifest(entries)
|
||||
spine = []
|
||||
for comic in comics:
|
||||
spine.extend(map(href, comic[2]))
|
||||
self._images = []
|
||||
for comic in comics:
|
||||
self._images.extend(comic[1])
|
||||
opf.create_spine(spine)
|
||||
toc = TOC()
|
||||
if len(comics) == 1:
|
||||
wrappers = comics[0][2]
|
||||
for i, x in enumerate(wrappers):
|
||||
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
|
||||
play_order=i)
|
||||
else:
|
||||
po = 0
|
||||
for comic in comics:
|
||||
po += 1
|
||||
wrappers = comic[2]
|
||||
stoc = toc.add_item(href(wrappers[0]),
|
||||
None, comic[0], play_order=po)
|
||||
if not opts.dont_add_comic_pages_to_toc:
|
||||
for i, x in enumerate(wrappers):
|
||||
stoc.add_item(href(x), None,
|
||||
_('Page')+' %d'%(i+1), play_order=po)
|
||||
po += 1
|
||||
opf.set_toc(toc)
|
||||
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
|
||||
opf.render(m, n, 'toc.ncx')
|
||||
return os.path.abspath('metadata.opf')
|
||||
|
||||
def create_wrappers(self, pages):
|
||||
from calibre.ebooks.oeb.base import XHTML_NS
|
||||
wrappers = []
|
||||
WRAPPER = textwrap.dedent('''\
|
||||
<html xmlns="%s">
|
||||
<head>
|
||||
<title>Page #%d</title>
|
||||
<style type="text/css">
|
||||
@page { margin:0pt; padding: 0pt}
|
||||
body { margin: 0pt; padding: 0pt}
|
||||
div { text-align: center }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<img src="%s" alt="comic page #%d" />
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
''')
|
||||
dir = os.path.dirname(pages[0])
|
||||
for i, page in enumerate(pages):
|
||||
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
|
||||
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
|
||||
open(page, 'wb').write(wrapper)
|
||||
wrappers.append(page)
|
||||
return wrappers
|
||||
|
@ -12,7 +12,6 @@ from subprocess import Popen, PIPE
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.txt.processor import convert_basic
|
||||
|
||||
class DJVUInput(InputFormatPlugin):
|
||||
|
||||
@ -28,6 +27,8 @@ class DJVUInput(InputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.txt.processor import convert_basic
|
||||
|
||||
stdout = StringIO()
|
||||
ppdjvu = True
|
||||
# using djvutxt is MUCH faster, should make it an option
|
@ -3,11 +3,9 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, uuid
|
||||
import os
|
||||
from itertools import cycle
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
|
||||
class EPUBInput(InputFormatPlugin):
|
||||
@ -30,6 +28,8 @@ class EPUBInput(InputFormatPlugin):
|
||||
f.write(raw[1024:])
|
||||
|
||||
def process_encryption(self, encfile, opf, log):
|
||||
from lxml import etree
|
||||
import uuid
|
||||
key = None
|
||||
for item in opf.identifier_iter():
|
||||
scheme = None
|
||||
@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
return False
|
||||
|
||||
def rationalize_cover(self, opf, log):
|
||||
from lxml import etree
|
||||
guide_cover, guide_elem = None, None
|
||||
for guide_elem in opf.iterguide():
|
||||
if guide_elem.get('type', '').lower() == 'cover':
|
||||
@ -110,6 +111,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
renderer)
|
||||
|
||||
def find_opf(self):
|
||||
from lxml import etree
|
||||
def attr(n, attr):
|
||||
for k, v in n.attrib.items():
|
||||
if k.endswith(attr):
|
@ -8,14 +8,12 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, shutil, re
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import CurrentDir
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.constants import filesystem_encoding
|
||||
|
||||
from lxml import etree
|
||||
|
||||
block_level_tags = (
|
||||
'address',
|
||||
'body',
|
||||
@ -289,6 +287,7 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
# }}}
|
||||
|
||||
def condense_ncx(self, ncx_path):
|
||||
from lxml import etree
|
||||
if not self.opts.pretty_print:
|
||||
tree = etree.parse(ncx_path)
|
||||
for tag in tree.getroot().iter(tag=etree.Element):
|
@ -6,7 +6,6 @@ Convert .fb2 files to .lrf
|
||||
"""
|
||||
import os, re
|
||||
from base64 import b64decode
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre import guess_type
|
||||
@ -38,6 +37,7 @@ class FB2Input(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.fb2.fb2ml import FB2MLizer
|
||||
|
||||
class FB2Output(OutputFormatPlugin):
|
||||
|
||||
@ -162,6 +161,7 @@ class FB2Output(OutputFormatPlugin):
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||
from calibre.ebooks.fb2.fb2ml import FB2MLizer
|
||||
|
||||
try:
|
||||
rasterizer = SVGRasterizer()
|
283
src/calibre/ebooks/conversion/plugins/html_input.py
Normal file
283
src/calibre/ebooks/conversion/plugins/html_input.py
Normal file
@ -0,0 +1,283 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, tempfile, os
|
||||
from functools import partial
|
||||
from itertools import izip
|
||||
from urllib import quote
|
||||
|
||||
from calibre.constants import islinux, isbsd
|
||||
from calibre.customize.conversion import (InputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
|
||||
|
||||
class HTMLInput(InputFormatPlugin):
|
||||
|
||||
name = 'HTML Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert HTML and OPF files to an OEB'
|
||||
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='breadth_first',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Traverse links in HTML files breadth first. Normally, '
|
||||
'they are traversed depth first.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='max_levels',
|
||||
recommended_value=5, level=OptionRecommendation.LOW,
|
||||
help=_('Maximum levels of recursion when following links in '
|
||||
'HTML files. Must be non-negative. 0 implies that no '
|
||||
'links in the root HTML file are followed. Default is '
|
||||
'%default.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='dont_package',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Normally this input plugin re-arranges all the input '
|
||||
'files into a standard folder hierarchy. Only use this option '
|
||||
'if you know what you are doing as it can result in various '
|
||||
'nasty side effects in the rest of the conversion pipeline.'
|
||||
)
|
||||
),
|
||||
|
||||
])
|
||||
|
||||
def convert(self, stream, opts, file_ext, log,
|
||||
accelerators):
|
||||
self._is_case_sensitive = None
|
||||
basedir = os.getcwd()
|
||||
self.opts = opts
|
||||
|
||||
fname = None
|
||||
if hasattr(stream, 'name'):
|
||||
basedir = os.path.dirname(stream.name)
|
||||
fname = os.path.basename(stream.name)
|
||||
|
||||
if file_ext != 'opf':
|
||||
if opts.dont_package:
|
||||
raise ValueError('The --dont-package option is not supported for an HTML input file')
|
||||
from calibre.ebooks.metadata.html import get_metadata
|
||||
mi = get_metadata(stream)
|
||||
if fname:
|
||||
from calibre.ebooks.metadata.meta import metadata_from_filename
|
||||
fmi = metadata_from_filename(fname)
|
||||
fmi.smart_update(mi)
|
||||
mi = fmi
|
||||
oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
|
||||
return oeb
|
||||
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
return create_oebbook(log, stream.name, opts,
|
||||
encoding=opts.input_encoding)
|
||||
|
||||
def is_case_sensitive(self, path):
|
||||
if getattr(self, '_is_case_sensitive', None) is not None:
|
||||
return self._is_case_sensitive
|
||||
if not path or not os.path.exists(path):
|
||||
return islinux or isbsd
|
||||
self._is_case_sensitive = not (os.path.exists(path.lower()) \
|
||||
and os.path.exists(path.upper()))
|
||||
return self._is_case_sensitive
|
||||
|
||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||
import uuid
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import (DirContainer,
|
||||
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
|
||||
xpath)
|
||||
from calibre import guess_type
|
||||
from calibre.ebooks.oeb.transforms.metadata import \
|
||||
meta_info_to_oeb_metadata
|
||||
from calibre.ebooks.html.input import get_filelist
|
||||
import cssutils, logging
|
||||
cssutils.log.setLevel(logging.WARN)
|
||||
self.OEB_STYLES = OEB_STYLES
|
||||
oeb = create_oebbook(log, None, opts, self,
|
||||
encoding=opts.input_encoding, populate=False)
|
||||
self.oeb = oeb
|
||||
|
||||
metadata = oeb.metadata
|
||||
meta_info_to_oeb_metadata(mi, metadata, log)
|
||||
if not metadata.language:
|
||||
oeb.logger.warn(u'Language not specified')
|
||||
metadata.add('language', get_lang().replace('_', '-'))
|
||||
if not metadata.creator:
|
||||
oeb.logger.warn('Creator not specified')
|
||||
metadata.add('creator', self.oeb.translate(__('Unknown')))
|
||||
if not metadata.title:
|
||||
oeb.logger.warn('Title not specified')
|
||||
metadata.add('title', self.oeb.translate(__('Unknown')))
|
||||
bookid = str(uuid.uuid4())
|
||||
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||
for ident in metadata.identifier:
|
||||
if 'id' in ident.attrib:
|
||||
self.oeb.uid = metadata.identifier[0]
|
||||
break
|
||||
|
||||
filelist = get_filelist(htmlpath, basedir, opts, log)
|
||||
filelist = [f for f in filelist if not f.is_binary]
|
||||
htmlfile_map = {}
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
oeb.container = DirContainer(os.path.dirname(path), log,
|
||||
ignore_opf=True)
|
||||
bname = os.path.basename(path)
|
||||
id, href = oeb.manifest.generate(id='html',
|
||||
href=ascii_filename(bname))
|
||||
htmlfile_map[path] = href
|
||||
item = oeb.manifest.add(id, href, 'text/html')
|
||||
item.html_input_href = bname
|
||||
oeb.spine.add(item, True)
|
||||
|
||||
self.added_resources = {}
|
||||
self.log = log
|
||||
self.log('Normalizing filename cases')
|
||||
for path, href in htmlfile_map.items():
|
||||
if not self.is_case_sensitive(path):
|
||||
path = path.lower()
|
||||
self.added_resources[path] = href
|
||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||
self.urldefrag = urldefrag
|
||||
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
|
||||
|
||||
self.log('Rewriting HTML links')
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
dpath = os.path.dirname(path)
|
||||
oeb.container = DirContainer(dpath, log, ignore_opf=True)
|
||||
item = oeb.manifest.hrefs[htmlfile_map[path]]
|
||||
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
||||
|
||||
for item in oeb.manifest.values():
|
||||
if item.media_type in self.OEB_STYLES:
|
||||
dpath = None
|
||||
for path, href in self.added_resources.items():
|
||||
if href == item.href:
|
||||
dpath = os.path.dirname(path)
|
||||
break
|
||||
cssutils.replaceUrls(item.data,
|
||||
partial(self.resource_adder, base=dpath))
|
||||
|
||||
toc = self.oeb.toc
|
||||
self.oeb.auto_generated_toc = True
|
||||
titles = []
|
||||
headers = []
|
||||
for item in self.oeb.spine:
|
||||
if not item.linear: continue
|
||||
html = item.data
|
||||
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
|
||||
title = re.sub(r'\s+', ' ', title.strip())
|
||||
if title:
|
||||
titles.append(title)
|
||||
headers.append('(unlabled)')
|
||||
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
|
||||
expr = '/h:html/h:body//h:%s[position()=1]/text()'
|
||||
header = ''.join(xpath(html, expr % tag))
|
||||
header = re.sub(r'\s+', ' ', header.strip())
|
||||
if header:
|
||||
headers[-1] = header
|
||||
break
|
||||
use = titles
|
||||
if len(titles) > len(set(titles)):
|
||||
use = headers
|
||||
for title, item in izip(use, self.oeb.spine):
|
||||
if not item.linear: continue
|
||||
toc.add(title, item.href)
|
||||
|
||||
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
|
||||
return oeb
|
||||
|
||||
def link_to_local_path(self, link_, base=None):
|
||||
from calibre.ebooks.html.input import Link
|
||||
if not isinstance(link_, unicode):
|
||||
try:
|
||||
link_ = link_.decode('utf-8', 'error')
|
||||
except:
|
||||
self.log.warn('Failed to decode link %r. Ignoring'%link_)
|
||||
return None, None
|
||||
try:
|
||||
l = Link(link_, base if base else os.getcwdu())
|
||||
except:
|
||||
self.log.exception('Failed to process link: %r'%link_)
|
||||
return None, None
|
||||
if l.path is None:
|
||||
# Not a local resource
|
||||
return None, None
|
||||
link = l.path.replace('/', os.sep).strip()
|
||||
frag = l.fragment
|
||||
if not link:
|
||||
return None, None
|
||||
return link, frag
|
||||
|
||||
def resource_adder(self, link_, base=None):
|
||||
link, frag = self.link_to_local_path(link_, base=base)
|
||||
if link is None:
|
||||
return link_
|
||||
try:
|
||||
if base and not os.path.isabs(link):
|
||||
link = os.path.join(base, link)
|
||||
link = os.path.abspath(link)
|
||||
except:
|
||||
return link_
|
||||
if not os.access(link, os.R_OK):
|
||||
return link_
|
||||
if os.path.isdir(link):
|
||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||
return link_
|
||||
if not self.is_case_sensitive(tempfile.gettempdir()):
|
||||
link = link.lower()
|
||||
if link not in self.added_resources:
|
||||
bhref = os.path.basename(link)
|
||||
id, href = self.oeb.manifest.generate(id='added',
|
||||
href=bhref)
|
||||
guessed = self.guess_type(href)[0]
|
||||
media_type = guessed or self.BINARY_MIME
|
||||
if media_type == 'text/plain':
|
||||
self.log.warn('Ignoring link to text file %r'%link_)
|
||||
return None
|
||||
|
||||
self.oeb.log.debug('Added', link)
|
||||
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
||||
self.oeb.log, ignore_opf=True)
|
||||
# Load into memory
|
||||
item = self.oeb.manifest.add(id, href, media_type)
|
||||
# bhref refers to an already existing file. The read() method of
|
||||
# DirContainer will call unquote on it before trying to read the
|
||||
# file, therefore we quote it here.
|
||||
if isinstance(bhref, unicode):
|
||||
bhref = bhref.encode('utf-8')
|
||||
item.html_input_href = quote(bhref).decode('utf-8')
|
||||
if guessed in self.OEB_STYLES:
|
||||
item.override_css_fetch = partial(
|
||||
self.css_import_handler, os.path.dirname(link))
|
||||
item.data
|
||||
self.added_resources[link] = href
|
||||
|
||||
nlink = self.added_resources[link]
|
||||
if frag:
|
||||
nlink = '#'.join((nlink, frag))
|
||||
return nlink
|
||||
|
||||
def css_import_handler(self, base, href):
|
||||
link, frag = self.link_to_local_path(href, base=base)
|
||||
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
|
||||
return (None, None)
|
||||
try:
|
||||
raw = open(link, 'rb').read().decode('utf-8', 'replace')
|
||||
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
|
||||
except:
|
||||
self.log.exception('Failed to read CSS file: %r'%link)
|
||||
return (None, None)
|
||||
return (None, raw)
|
@ -4,22 +4,11 @@ __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, shutil
|
||||
|
||||
from calibre.utils import zipfile
|
||||
|
||||
from os.path import dirname, abspath, relpath, exists, basename
|
||||
|
||||
from lxml import etree
|
||||
from templite import Templite
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre import CurrentDir
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
from urllib import unquote
|
||||
|
||||
from calibre.ebooks.html.meta import EasyMeta
|
||||
|
||||
class HTMLOutput(OutputFormatPlugin):
|
||||
|
||||
@ -50,6 +39,9 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
'''
|
||||
Generate table of contents
|
||||
'''
|
||||
from lxml import etree
|
||||
from urllib import unquote
|
||||
|
||||
from calibre.ebooks.oeb.base import element
|
||||
with CurrentDir(output_dir):
|
||||
def build_node(current_node, parent=None):
|
||||
@ -72,11 +64,18 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
return wrap
|
||||
|
||||
def generate_html_toc(self, oeb_book, ref_url, output_dir):
|
||||
from lxml import etree
|
||||
|
||||
root = self.generate_toc(oeb_book, ref_url, output_dir)
|
||||
return etree.tostring(root, pretty_print=True, encoding='utf-8',
|
||||
xml_declaration=False)
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from lxml import etree
|
||||
from calibre.utils import zipfile
|
||||
from templite import Templite
|
||||
from urllib import unquote
|
||||
from calibre.ebooks.html.meta import EasyMeta
|
||||
|
||||
# read template files
|
||||
if opts.template_html_index is not None:
|
||||
@ -192,7 +191,7 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
f.write(t)
|
||||
item.unload_data_from_memory(memory=path)
|
||||
|
||||
zfile = ZipFile(output_path, "w")
|
||||
zfile = zipfile.ZipFile(output_path, "w")
|
||||
zfile.add_dir(output_dir, basename(output_dir))
|
||||
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
|
||||
|
@ -10,9 +10,6 @@ import os
|
||||
|
||||
from calibre import guess_type
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
class HTMLZInput(InputFormatPlugin):
|
||||
|
||||
@ -23,6 +20,10 @@ class HTMLZInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
self.log = log
|
||||
html = u''
|
||||
top_levels = []
|
@ -9,13 +9,10 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
class HTMLZOutput(OutputFormatPlugin):
|
||||
|
||||
@ -43,7 +40,10 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
|
||||
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
# HTML
|
||||
if opts.htmlz_css_type == 'inline':
|
||||
@ -81,7 +81,7 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
fname = os.path.join(tdir, 'images', images[item.href])
|
||||
with open(fname, 'wb') as img:
|
||||
img.write(data)
|
||||
|
||||
|
||||
# Cover
|
||||
cover_path = None
|
||||
try:
|
87
src/calibre/ebooks/conversion/plugins/lrf_input.py
Normal file
87
src/calibre/ebooks/conversion/plugins/lrf_input.py
Normal file
@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, sys
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
class LRFInput(InputFormatPlugin):
|
||||
|
||||
name = 'LRF Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert LRF files to HTML'
|
||||
file_types = set(['lrf'])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lrf.input import (MediaType, Styles, TextBlock,
|
||||
Canvas, ImageBlock, RuledLine)
|
||||
self.log = log
|
||||
self.log('Generating XML')
|
||||
from calibre.ebooks.lrf.lrfparser import LRFDocument
|
||||
d = LRFDocument(stream)
|
||||
d.parse()
|
||||
xml = d.to_xml(write_files=True)
|
||||
if options.verbose > 2:
|
||||
open('lrs.xml', 'wb').write(xml.encode('utf-8'))
|
||||
parser = etree.XMLParser(no_network=True, huge_tree=True)
|
||||
try:
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
except:
|
||||
self.log.warn('Failed to parse XML. Trying to recover')
|
||||
parser = etree.XMLParser(no_network=True, huge_tree=True,
|
||||
recover=True)
|
||||
doc = etree.fromstring(xml, parser=parser)
|
||||
|
||||
|
||||
char_button_map = {}
|
||||
for x in doc.xpath('//CharButton[@refobj]'):
|
||||
ro = x.get('refobj')
|
||||
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
|
||||
if jump_button:
|
||||
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
|
||||
if jump_to:
|
||||
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
|
||||
jump_to[0].get('refobj'))
|
||||
plot_map = {}
|
||||
for x in doc.xpath('//Plot[@refobj]'):
|
||||
ro = x.get('refobj')
|
||||
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
|
||||
if image:
|
||||
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
|
||||
image[0].get('refstream'))
|
||||
if imgstr:
|
||||
plot_map[ro] = imgstr[0].get('file')
|
||||
|
||||
self.log('Converting XML to HTML...')
|
||||
styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
|
||||
media_type = MediaType()
|
||||
styles = Styles()
|
||||
text_block = TextBlock(styles, char_button_map, plot_map, log)
|
||||
canvas = Canvas(doc, styles, text_block, log)
|
||||
image_block = ImageBlock(canvas)
|
||||
ruled_line = RuledLine()
|
||||
extensions = {
|
||||
('calibre', 'media-type') : media_type,
|
||||
('calibre', 'text-block') : text_block,
|
||||
('calibre', 'ruled-line') : ruled_line,
|
||||
('calibre', 'styles') : styles,
|
||||
('calibre', 'canvas') : canvas,
|
||||
('calibre', 'image-block'): image_block,
|
||||
}
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
try:
|
||||
result = transform(doc)
|
||||
except RuntimeError:
|
||||
sys.setrecursionlimit(5000)
|
||||
result = transform(doc)
|
||||
|
||||
with open('content.opf', 'wb') as f:
|
||||
f.write(result)
|
||||
styles.write()
|
||||
return os.path.abspath('content.opf')
|
25
src/calibre/ebooks/conversion/plugins/odt_input.py
Normal file
25
src/calibre/ebooks/conversion/plugins/odt_input.py
Normal file
@ -0,0 +1,25 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Convert an ODT file into a Open Ebook
|
||||
'''
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
|
||||
class ODTInput(InputFormatPlugin):
|
||||
|
||||
name = 'ODT Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert ODT (OpenOffice) files to HTML'
|
||||
file_types = set(['odt'])
|
||||
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.odt.input import Extract
|
||||
return Extract()(stream, '.', log)
|
||||
|
||||
|
@ -5,13 +5,10 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.customize.conversion import (OutputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre import CurrentDir
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
from urllib import unquote
|
||||
|
||||
class OEBOutput(OutputFormatPlugin):
|
||||
|
||||
@ -23,6 +20,9 @@ class OEBOutput(OutputFormatPlugin):
|
||||
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from urllib import unquote
|
||||
from lxml import etree
|
||||
|
||||
self.log, self.opts = log, opts
|
||||
if not os.path.exists(output_path):
|
||||
os.makedirs(output_path)
|
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
||||
|
||||
class PDBInput(InputFormatPlugin):
|
||||
|
||||
@ -19,6 +17,9 @@ class PDBInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
|
||||
|
||||
header = PdbHeaderReader(stream)
|
||||
Reader = get_reader(header.ident)
|
||||
|
@ -8,7 +8,7 @@ import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.pdb import PDBError, get_writer, FORMAT_WRITERS
|
||||
from calibre.ebooks.pdb import PDBError, get_writer, ALL_FORMAT_WRITERS
|
||||
|
||||
class PDBOutput(OutputFormatPlugin):
|
||||
|
||||
@ -19,9 +19,9 @@ class PDBOutput(OutputFormatPlugin):
|
||||
options = set([
|
||||
OptionRecommendation(name='format', recommended_value='doc',
|
||||
level=OptionRecommendation.LOW,
|
||||
short_switch='f', choices=FORMAT_WRITERS.keys(),
|
||||
short_switch='f', choices=list(ALL_FORMAT_WRITERS),
|
||||
help=(_('Format to use inside the pdb container. Choices are:')+\
|
||||
' %s' % FORMAT_WRITERS.keys())),
|
||||
' %s' % list(ALL_FORMAT_WRITERS))),
|
||||
OptionRecommendation(name='pdb_output_encoding', recommended_value='cp1252',
|
||||
level=OptionRecommendation.LOW,
|
||||
help=_('Specify the character encoding of the output document. ' \
|
@ -7,10 +7,6 @@ __docformat__ = 'restructuredtext en'
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.pdf.pdftohtml import pdftohtml
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.constants import plugins
|
||||
pdfreflow, pdfreflow_err = plugins['pdfreflow']
|
||||
|
||||
class PDFInput(InputFormatPlugin):
|
||||
|
||||
@ -31,6 +27,9 @@ class PDFInput(InputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert_new(self, stream, accelerators):
|
||||
from calibre.constants import plugins
|
||||
pdfreflow, pdfreflow_err = plugins['pdfreflow']
|
||||
|
||||
from calibre.ebooks.pdf.reflow import PDFDocument
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
if pdfreflow_err:
|
||||
@ -43,6 +42,9 @@ class PDFInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.pdf.pdftohtml import pdftohtml
|
||||
|
||||
log.debug('Converting file to html...')
|
||||
# The main html file will be named index.html
|
||||
self.opts, self.log = options, log
|
@ -13,10 +13,50 @@ import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.pdf.pageoptions import UNITS, PAPER_SIZES, \
|
||||
ORIENTATIONS
|
||||
|
||||
UNITS = [
|
||||
'millimeter',
|
||||
'point',
|
||||
'inch' ,
|
||||
'pica' ,
|
||||
'didot',
|
||||
'cicero',
|
||||
'devicepixel',
|
||||
]
|
||||
|
||||
PAPER_SIZES = ['b2',
|
||||
'a9',
|
||||
'executive',
|
||||
'tabloid',
|
||||
'b4',
|
||||
'b5',
|
||||
'b6',
|
||||
'b7',
|
||||
'b0',
|
||||
'b1',
|
||||
'letter',
|
||||
'b3',
|
||||
'a7',
|
||||
'a8',
|
||||
'b8',
|
||||
'b9',
|
||||
'a3',
|
||||
'a1',
|
||||
'folio',
|
||||
'c5e',
|
||||
'dle',
|
||||
'a0',
|
||||
'ledger',
|
||||
'legal',
|
||||
'a6',
|
||||
'a2',
|
||||
'b10',
|
||||
'a5',
|
||||
'comm10e',
|
||||
'a4']
|
||||
|
||||
ORIENTATIONS = ['portrait', 'landscape']
|
||||
|
||||
class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
@ -26,23 +66,23 @@ class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='unit', recommended_value='inch',
|
||||
level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(),
|
||||
level=OptionRecommendation.LOW, short_switch='u', choices=UNITS,
|
||||
help=_('The unit of measure. Default is inch. Choices '
|
||||
'are %s '
|
||||
'Note: This does not override the unit for margins!') % UNITS.keys()),
|
||||
'Note: This does not override the unit for margins!') % UNITS),
|
||||
OptionRecommendation(name='paper_size', recommended_value='letter',
|
||||
level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(),
|
||||
level=OptionRecommendation.LOW, choices=PAPER_SIZES,
|
||||
help=_('The size of the paper. This size will be overridden when a '
|
||||
'non default output profile is used. Default is letter. Choices '
|
||||
'are %s') % PAPER_SIZES.keys()),
|
||||
'are %s') % PAPER_SIZES),
|
||||
OptionRecommendation(name='custom_size', recommended_value=None,
|
||||
help=_('Custom size of the document. Use the form widthxheight '
|
||||
'EG. `123x321` to specify the width and height. '
|
||||
'This overrides any specified paper-size.')),
|
||||
OptionRecommendation(name='orientation', recommended_value='portrait',
|
||||
level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(),
|
||||
level=OptionRecommendation.LOW, choices=ORIENTATIONS,
|
||||
help=_('The orientation of the page. Default is portrait. Choices '
|
||||
'are %s') % ORIENTATIONS.keys()),
|
||||
'are %s') % ORIENTATIONS),
|
||||
OptionRecommendation(name='preserve_cover_aspect_ratio',
|
||||
recommended_value=False,
|
||||
help=_('Preserve the aspect ratio of the cover, instead'
|
||||
@ -105,6 +145,8 @@ class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
def convert_text(self, oeb_book):
|
||||
from calibre.ebooks.pdf.writer import PDFWriter
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
|
||||
self.log.debug('Serializing oeb input to disk for processing...')
|
||||
self.get_cover_data()
|
||||
|
@ -11,9 +11,6 @@ import shutil
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
|
||||
class PMLInput(InputFormatPlugin):
|
||||
|
||||
@ -24,6 +21,8 @@ class PMLInput(InputFormatPlugin):
|
||||
file_types = set(['pml', 'pmlz'])
|
||||
|
||||
def process_pml(self, pml_path, html_path, close_all=False):
|
||||
from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
|
||||
|
||||
pclose = False
|
||||
hclose = False
|
||||
|
||||
@ -85,6 +84,9 @@ class PMLInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
|
||||
self.options = options
|
||||
self.log = log
|
||||
pages, images = [], []
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user