This commit is contained in:
Sengian 2011-11-05 13:15:24 +01:00
commit 664bb97146
220 changed files with 48204 additions and 39168 deletions

View File

@ -19,6 +19,114 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.8.24
date: 2011-10-27
new features:
- title: "Kobo: Add support for fetching annotations from the kobo reader."
description: "Right click the send to device button in calibre with your kobo connected and choose fetch annotations. The annotations are placed into the comments of the corresponding books in the calibre library. This feature is still experimental."
type: major
- title: "Preserve the set of selected books in the library view when a device is connected, fixing a long standing annoyance"
bug fixes:
- title: "Prevent changing of device metadata management option while a device is connected."
tickets: [874118]
- title: "Book details panel: Show tooltip only when hovering over cover, not the rest of the book information, as it makes it hard to read."
tickets: [876454]
- title: "MOBI Output: Fix use of list elements as link anchors caused links to always point to start of list."
tickets: [879391]
- title: "RB Output: Fix calibre generated rb files not being opened by the RocketBook."
tickets: [880930]
- title: "FB2 Input: Dont choke on FB2 files that have empty embedded content tags."
tickets: [880904]
- title: "ODT Input: CSS rationalization should not fail with non ascii class names"
- title: "Fix creating new library using the copy structure option incorrectly setting all text type columns to be like the tags column"
- title: "E-book viewer: Don't choke on windows installs with a non UTF-8 filesystem encoding."
tickets: [879740]
improved recipes:
- Novaya Gazeta
- El Universal (Venezuela)
- The Australian (subscription enabled)
- Metro NL
- The Scotsman
- Japan Times
new recipes:
- title: Silicon Republic
author: Neil Grogan
- title: Calibre Blog
author: Krittika Goyal
- version: 0.8.23
date: 2011-10-21
new features:
- title: "Drivers for T-Mobile Move, new Pandigital Novel, New Onyx Boox and Freescale MX 515"
- title: "SONY T1 driver: Support for periodicals and better timezone detection"
- title: "Add a remove cover entry to the right click menu of the cover display in the right panel"
tickets: [874689]
bug fixes:
- title: "Amazon metadata download: Fix for change in Amazon website that broke downloading metadata."
tickets: [878395]
- title: "MOBI metadata: When reading titles from MOBI files only use the title in the PDB header if there is no long title in the EXTH header"
tickets: [ 875243 ]
- title: "Fix regression that broke use of complex custom columns in save to disk templates."
tickets: [877366]
- title: "Fix regression that broke reading metadata from CHM files"
- title: "Fix a bug that broke conversion of some zipped up HTML files with non ascii filenames on certain windows installs."
tickets: [873288]
- title: "RTF Input: Fix bug in handling of paragraph separators."
tickets: [863735]
- title: "Fix a regression that broke downloading certain periodicals for the Kindle."
tickets: [875595]
- title: "Fix regression that broke updating of covers inside ebook files when saving to disk"
- title: "Fix regression breaking editing the 'show in tag browser' checkbox in custom column setup editing"
- title: "Fix typo that broke stopping selected jobs in 0.8.22"
improved recipes:
- Columbus Dispatch
- Ming Pao
- La Republica
- Korea Times
- USA Today
- CNN
- Liberation
- El Pais
- Helsingin Sanomat
new recipes:
- title: Kyugyhang, Hankyoreh and Hankyoreh21
author: Seongkyoun Yoo.
- title: English Katherimini
author: Thomas Scholl
- title: Various French news sources
author: Aurelien Chabot.
- version: 0.8.22 - version: 0.8.22
date: 2011-10-14 date: 2011-10-14

70
recipes/20minutes.recipe Normal file
View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
20minutes.fr
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Minutes(BasicNewsRecipe):
title = '20 minutes'
__author__ = 'calibre'
description = 'Actualités'
encoding = 'cp1252'
publisher = '20minutes.fr'
category = 'Actualités, France, Monde'
language = 'fr'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.mna-details {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.mna-image {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.mna-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['mn-section-heading']}),
dict(name='a', attrs={'href':['#commentaires']}),
dict(name='div', attrs={'class':['mn-right']}),
dict(name='div', attrs={'class':['mna-box']}),
dict(name='div', attrs={'class':['mna-comment-call']}),
dict(name='div', attrs={'class':['mna-tools']}),
dict(name='div', attrs={'class':['mn-trilist']})
]
keep_only_tags = [dict(id='mn-article')]
remove_tags_after = dict(name='div', attrs={'class':['mna-body','mna-signature']})
feeds = [
('France', 'http://www.20minutes.fr/rss/actu-france.xml'),
('International', 'http://www.20minutes.fr/rss/monde.xml'),
('Tech/Web', 'http://www.20minutes.fr/rss/hightech.xml'),
('Sciences', 'http://www.20minutes.fr/rss/sciences.xml'),
('Economie', 'http://www.20minutes.fr/rss/economie.xml'),
('Politique', 'http://www.20minutes.fr/rss/politique.xml'),
(u'Médias', 'http://www.20minutes.fr/rss/media.xml'),
('Cinema', 'http://www.20minutes.fr/rss/cinema.xml'),
('People', 'http://www.20minutes.fr/rss/people.xml'),
('Culture', 'http://www.20minutes.fr/rss/culture.xml'),
('Sport', 'http://www.20minutes.fr/rss/sport.xml'),
('Paris', 'http://www.20minutes.fr/rss/paris.xml'),
('Lyon', 'http://www.20minutes.fr/rss/lyon.xml'),
('Toulouse', 'http://www.20minutes.fr/rss/toulouse.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
b365.realitatea.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class b365Realitatea(BasicNewsRecipe):
title = u'b365 Realitatea'
__author__ = u'Silviu Cotoar\u0103'
publisher = u'b365 Realitatea'
description = u'b365 Realitatea'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Romania,Bucuresti'
encoding = 'utf-8'
cover_url = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'class':'newsArticle'})
]
remove_tags = [
dict(name='div', attrs={'class':'date'})
, dict(name='dic', attrs={'class':'addthis_toolbox addthis_default_style'})
, dict(name='div', attrs={'class':'related_posts'})
, dict(name='div', attrs={'id':'RelevantiWidget'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'RelevantiWidget'})
]
feeds = [
(u'\u0218tiri', u'http://b365.realitatea.net/rss-full/')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,18 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CalibreBlog(BasicNewsRecipe):
title = u'Calibre Blog'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 1000 #days
max_articles_per_feed = 5
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('Article',
'http://blog.calibre-ebook.com/feeds/posts/default'),
]

35
recipes/capital_gr.recipe Normal file
View File

@ -0,0 +1,35 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class Capital(BasicNewsRecipe):
title = 'Capital.gr'
__author__ ='Stelios'
description = 'Financial News from Greece'
#max_articles_per_feed = 100
oldest_article = 3
publisher = 'Capital.gr'
category = 'news, GR'
language = 'el'
encoding = 'windows-1253'
cover_url = 'http://files.capital.gr/images/caplogo.gif'
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
keep_only_tags = [
dict(name='h1'),
dict(name='p'),
dict(name='span', attrs={'id' : ["textbody"]})
]
#3 posts seemed to have utf8 encoding
feeds = [
(u'\u039F\u039B\u0395\u03A3 \u039F\u0399 \u0395\u0399\u0394\u0397\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-1'),
(u'\u0395\u03A0\u0399\u03A7\u0395\u0399\u03A1\u0397\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-2'),
(u'\u0391\u0393\u039F\u03A1\u0395\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-3'),
(u'\u039F\u0399\u039A\u039F\u039D\u039F\u039C\u0399\u0391', 'http://www.capital.gr/news/newsrss.asp?s=-4'),
(u'\u03A7\u03A1\u0397\u039C. \u0391\u039D\u0391\u039A\u039F\u0399\u039D\u03A9\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-6'),
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u039C\u0395 \u0391\u03A0\u039F\u03A8\u0397', 'http://www.capital.gr/articles/articlesrss.asp?catid=4'),
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A3\u0399\u03A9\u03A0\u0397\u03A4\u0397\u03A1\u0399\u039F', 'http://www.capital.gr/articles/articlesrss.asp?catid=6'),
(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', 'http://www.capital.gr/articles/articlesrss.asp?catid=8'),
#(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A4\u0395\u03A7\u039D\u039F\u039B\u039F\u0393\u0399\u0391', 'http://www.capital.gr/news/newsrss.asp?s=-8') not working for now
]

51
recipes/catavencii.recipe Normal file
View File

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
catavencii.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Catavencii(BasicNewsRecipe):
title = u'Ca\u0163avencii'
__author__ = u'Silviu Cotoar\u0103'
publisher = u'Ca\u0163avencii'
description = u'Ca\u0163avencii'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Romania'
encoding = 'utf-8'
cover_url = 'http://www.simonatache.ro/wp-content/uploads/2011/06/catavencii-logo.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'content'})
]
remove_tags = [
dict(name='div', attrs={'id':'breadcrumbs'})
, dict(name='span', attrs={'class':'info'})
, dict(name='div', attrs={'id':'social-media-article'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'social-media-article'})
]
feeds = [
(u'\u0218tiri', u'http://www.catavencii.ro/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -22,6 +22,14 @@ class CNN(BasicNewsRecipe):
#match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html'] #match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
max_articles_per_feed = 25 max_articles_per_feed = 25
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''), (re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''), (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
@ -32,7 +40,12 @@ class CNN(BasicNewsRecipe):
remove_tags = [ remove_tags = [
{'class':['cnn_strybtntools', 'cnn_strylftcntnt', {'class':['cnn_strybtntools', 'cnn_strylftcntnt',
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt', 'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
'cnn_strycntntrgt', 'hed_side', 'foot']}, 'cnn_strycntntrgt', 'hed_side', 'foot', 'cnn_strylftcntnt cnn_strylftcexpbx']},
{'class':['cnn_html_media_title_new', 'cnn_html_media_title_new cnn_html_media_title_none',
'cnnArticleGalleryCaptionControlText', 'articleGalleryNavContainer']},
{'id':['articleGalleryNav00JumpPrev', 'articleGalleryNav00Prev',
'articleGalleryNav00Next', 'articleGalleryNav00JumpNext']},
{'style':['display:none']},
dict(id=['ie_column']), dict(id=['ie_column']),
] ]
@ -58,3 +71,12 @@ class CNN(BasicNewsRecipe):
ans = BasicNewsRecipe.get_article_url(self, article) ans = BasicNewsRecipe.get_article_url(self, article)
return ans.partition('?')[0] return ans.partition('?')[0]
def get_masthead_url(self):
masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -14,67 +14,43 @@ class ColumbusDispatchRecipe(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True remove_empty_feeds = True
oldest_article = 1.2 oldest_article = 1.2
max_articles_per_feed = 100 use_embedded_content = False
no_stylesheets = True no_stylesheets = True
remove_javascript = True auto_cleanup = True
encoding = 'utf-8' #auto_cleanup_keep = '//div[@id="story-photos"]'
# Seems to work best, but YMMV
simultaneous_downloads = 2
# Feeds from http://www.dispatch.com/live/content/rss/index.html # Feeds from http://www.dispatch.com/live/content/rss/index.html
feeds = [] feeds = [
feeds.append((u'News: Local and state news', u'http://www.dispatch.com/live/static/crt/2_rss_localnews.xml')) ('Local',
feeds.append((u'News: National news', u'http://www.dispatch.com/live/static/crt/2_rss_nationalnews.xml')) 'http://www.dispatch.com/content/syndication/news_local-state.xml'),
feeds.append((u'News: Editorials', u'http://www.dispatch.com/live/static/crt/2_rss_editorials.xml')) ('National',
feeds.append((u'News: Columnists', u'http://www.dispatch.com/live/static/crt/2_rss_columnists.xml')) 'http://www.dispatch.com/content/syndication/news_national.xml'),
feeds.append((u'News: Health news', u'http://www.dispatch.com/live/static/crt/2_rss_health.xml')) ('Business',
feeds.append((u'News: Science news', u'http://www.dispatch.com/live/static/crt/2_rss_science.xml')) 'http://www.dispatch.com/content/syndication/news_business.xml'),
feeds.append((u'Sports: OSU football', u'http://www.dispatch.com/live/static/crt/2_rss_osufootball.xml')) ('Editorials',
feeds.append((u'Sports: OSU men\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osumensbball.xml')) 'http://www.dispatch.com/content/syndication/opinion_editorials.xml'),
feeds.append((u'Sports: OSU women\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osuwomensbball.xml')) ('Columnists',
feeds.append((u'Sports: OSU sports', u'http://www.dispatch.com/live/static/crt/2_rss_osusports.xml')) 'http://www.dispatch.com/content/syndication/opinion_columns.xml'),
feeds.append((u'Sports: Blue Jackets', u'http://www.dispatch.com/live/static/crt/2_rss_bluejackets.xml')) ('Life and Arts',
feeds.append((u'Sports: Crew', u'http://www.dispatch.com/live/static/crt/2_rss_crew.xml')) 'http://www.dispatch.com/content/syndication/lae_life-and-arts.xml'),
feeds.append((u'Sports: Clippers', u'http://www.dispatch.com/live/static/crt/2_rss_clippers.xml')) ('OSU Sports',
feeds.append((u'Sports: Indians', u'http://www.dispatch.com/live/static/crt/2_rss_indians.xml')) 'http://www.dispatch.com/content/syndication/sports_osu.xml'),
feeds.append((u'Sports: Reds', u'http://www.dispatch.com/live/static/crt/2_rss_reds.xml')) ('Auto Racing',
feeds.append((u'Sports: Golf', u'http://www.dispatch.com/live/static/crt/2_rss_golf.xml')) 'http://www.dispatch.com/content/syndication/sports_auto-racing.xml'),
feeds.append((u'Sports: Outdoors', u'http://www.dispatch.com/live/static/crt/2_rss_outdoors.xml')) ('Outdoors',
feeds.append((u'Sports: Cavs/NBA', u'http://www.dispatch.com/live/static/crt/2_rss_cavaliers.xml')) 'http://www.dispatch.com/content/syndication/sports_outdoors.xml'),
feeds.append((u'Sports: High Schools', u'http://www.dispatch.com/live/static/crt/2_rss_highschools.xml')) ('Bengals',
feeds.append((u'Sports: Browns', u'http://www.dispatch.com/live/static/crt/2_rss_browns.xml')) 'http://www.dispatch.com/content/syndication/sports_bengals.xml'),
feeds.append((u'Sports: Bengals', u'http://www.dispatch.com/live/static/crt/2_rss_bengals.xml')) ('Indians',
feeds.append((u'Sports: Auto Racing', u'http://www.dispatch.com/live/static/crt/2_rss_autoracing.xml')) 'http://www.dispatch.com/content/syndication/sports_indians.xml'),
feeds.append((u'Business News', u'http://www.dispatch.com/live/static/crt/2_rss_business.xml')) ('Clippers',
feeds.append((u'Features: Weekender', u'http://www.dispatch.com/live/static/crt/2_rss_weekender.xml')) 'http://www.dispatch.com/content/syndication/sports_clippers.xml'),
feeds.append((u'Features: Life and Arts', u'http://www.dispatch.com/live/static/crt/2_rss_lifearts.xml')) ('Crew',
feeds.append((u'Features: Food', u'http://www.dispatch.com/live/static/crt/2_rss_food.xml')) 'http://www.dispatch.com/content/syndication/sports_crew.xml'),
feeds.append((u'Features: NOW! for kids', u'http://www.dispatch.com/live/static/crt/2_rss_now.xml')) ('Reds',
feeds.append((u'Features: Travel', u'http://www.dispatch.com/live/static/crt/2_rss_travel.xml')) 'http://www.dispatch.com/content/syndication/sports_reds.xml'),
feeds.append((u'Features: Home and Garden', u'http://www.dispatch.com/live/static/crt/2_rss_homegarden.xml')) ('Blue Jackets',
feeds.append((u'Features: Faith and Values', u'http://www.dispatch.com/live/static/crt/2_rss_faithvalues.xml')) 'http://www.dispatch.com/content/syndication/sports_bluejackets.xml'),
#feeds.append((u'', u'')) ]
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'colhed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'hed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subhed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'date'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'byline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'srcline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'body'}))
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'id': 'middle-story-ad-container'}))
extra_css = '''
body {font-family:verdana,arial,helvetica,geneva,sans-serif ;}
a {text-decoration: none; color: blue;}
div.colhed {font-weight: bold;}
div.hed {font-size: xx-large; font-weight: bold; margin-bottom: 0.2em;}
div.subhed {font-size: large;}
div.date {font-size: x-small; font-style: italic; color: #666666; margin-top: 0.4em; margin-bottom: 0.4em;}
div.byline, div.srcline {font-size: small; color: #696969;}
'''

View File

@ -1,10 +1,11 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1306061239(BasicNewsRecipe): class AdvancedUserRecipe1306061239(BasicNewsRecipe):
title = u'The Daily Mirror' title = u'The Daily Mirror'
description = 'News as provide by The Daily Mirror -UK' description = 'News as provide by The Daily Mirror -UK'
__author__ = 'Dave Asbury' __author__ = 'Dave Asbury'
# last updated 30/10/11
language = 'en_GB' language = 'en_GB'
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg' cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@ -12,26 +13,30 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif' masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
oldest_article = 1 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 30
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
extra_css = '''
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''
keep_only_tags = [ keep_only_tags = [
dict(name='h1'), dict(name='div',attrs={'id' : 'body-content'})
dict(attrs={'class':['article-attr']}), ]
dict(name='div', attrs={'class' : [ 'article-body', 'crosshead']})
remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class' : ['caption', 'article-resize']}), dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
dict( attrs={'class':'append-html'}) dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
] dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'})
]
preprocess_regexps = [
(re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
feeds = [ feeds = [
@ -43,10 +48,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml') ,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml')
,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml') ,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml')
,(u'Sport','http://www.mirror.co.uk/sport/rss.xml') ,(u'Sport','http://www.mirror.co.uk/sport/rss.xml')
,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml') ,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
,(u'Advice','http://www.mirror.co.uk/advice/rss.xml') ,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml') ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml') # example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
]
]

View File

@ -16,7 +16,7 @@ class DeutscheWelle_es(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'de_ES' language = 'de'
publication_type = 'newsportal' publication_type = 'newsportal'
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif'

View File

@ -0,0 +1,58 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
class Ekathimerini(BasicNewsRecipe):
title = 'ekathimerini'
__author__ = 'Thomas Scholl'
description = 'News from Greece, English edition'
masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
max_articles_per_feed = 100
oldest_article = 100
publisher = 'Kathimerini'
category = 'news, GR'
language = 'en_GR'
encoding = 'windows-1253'
conversion_options = { 'linearize_tables': True}
no_stylesheets = True
delay = 1
keep_only_tags = [dict(name='td', attrs={'class':'news'})]
rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
def find_articles(self, idx, category):
for article in idx.findAll('item'):
cat = u''
cat_elem = article.find('subcat')
if cat_elem:
cat = self.tag_to_string(cat_elem)
if cat == category:
desc_html = self.tag_to_string(article.find('description'))
description = self.tag_to_string(BeautifulSoup(desc_html))
a = {
'title': self.tag_to_string(article.find('title')),
'url': self.tag_to_string(article.find('link')),
'description': description,
'date' : self.tag_to_string(article.find('pubdate')),
}
yield a
def parse_index(self):
idx_contents = self.browser.open(self.rss_url).read()
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')]))
cats.sort()
feeds = [(u'News',list(self.find_articles(idx, u'')))]
for cat in cats:
feeds.append((cat.capitalize(), list(self.find_articles(idx, cat))))
return feeds
def print_version(self, url):
return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')

View File

@ -33,7 +33,7 @@ class ElPais(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})] keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','cuerpo_noticia','caja_despiece']})]
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} ' extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '

View File

@ -56,6 +56,7 @@ class ElUniversal(BasicNewsRecipe):
] ]
def print_version(self, url): def print_version(self, url):
rp,sep,rest = url.rpartition('/') return url + '-imp'
return rp + sep + 'imp_' + rest
def get_article_url(self, article):
return article.get('guid', None)

10
recipes/frandroid.recipe Normal file
View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318572550(BasicNewsRecipe):
title = u'FrAndroid'
oldest_article = 2
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'FrAndroid', u'http://feeds.feedburner.com/Frandroid')]

View File

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318572445(BasicNewsRecipe):
title = u'Google Mobile Blog'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]

47
recipes/hankyoreh.recipe Normal file
View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh(BasicNewsRecipe):
title = u'Hankyoreh'
language = 'ko'
description = u'The Hankyoreh News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 5
recursions = 1
max_articles_per_feed = 5
no_stylesheets = True
keep_only_tags = [
dict(name='tr', attrs={'height':['60px']}),
dict(id=['fontSzArea'])
]
remove_tags = [
dict(target='_blank'),
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
dict(name='iframe', attrs={'width':['590']}),
]
remove_tags_after = [
dict(target='_top')
]
feeds = [
('All News','http://www.hani.co.kr/rss/'),
('Politics','http://www.hani.co.kr/rss/politics/'),
('Economy','http://www.hani.co.kr/rss/economy/'),
('Society','http://www.hani.co.kr/rss/society/'),
('International','http://www.hani.co.kr/rss/international/'),
('Culture','http://www.hani.co.kr/rss/culture/'),
('Sports','http://www.hani.co.kr/rss/sports/'),
('Science','http://www.hani.co.kr/rss/science/'),
('Opinion','http://www.hani.co.kr/rss/opinion/'),
('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
('English Edition','http://www.hani.co.kr/rss/english_edition/'),
('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
('Multihani','http://www.hani.co.kr/rss/multihani/'),
('Lead','http://www.hani.co.kr/rss/lead/'),
('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
]

View File

@ -0,0 +1,25 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh21(BasicNewsRecipe):
title = u'Hankyoreh21'
language = 'ko'
description = u'The Hankyoreh21 Magazine articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 20
recursions = 1
max_articles_per_feed = 120
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='font', attrs={'class':'t18bk'}),
dict(id=['fontSzArea'])
]
feeds = [
('Hani21','http://h21.hani.co.kr/rss/ '),
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 323 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 600 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 712 B

34
recipes/in_gr.recipe Normal file
View File

@ -0,0 +1,34 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class ingr(BasicNewsRecipe):
title = 'in.gr'
__author__ = 'Stelios'
description = 'News from Greece'
# max_articles_per_feed = 100
oldest_article = 4
publisher = 'in.gr'
category = 'news, GR'
language = 'el'
encoding = 'utf8'
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
encoding = 'utf8'
keep_only_tags = [
dict(name='h1'),
dict(name='div', attrs={'id' : ['in-news-article']})
]
remove_tags = [
dict(name='em', attrs={'class' : ['credits']}),
dict(name='div', attrs={'class' : ['article-tools-hor', 'promo-banners gAds', 'main', 'article-listen-player', 'article-tools-hor-bttm', 'tools-sec', 'article-tools', 'article-listen-player-ver']})
]
feeds = [
(u'\u0395\u03BB\u03BB\u03AC\u03B4\u03B1', 'http://rss.in.gr/feed/news/greece'),
(u'\u0395\u03B9\u03B4\u03AE\u03C3\u03B5\u03B9\u03C2', 'http://rss.in.gr/feed/news'),
(u'\u039A\u03CC\u03C3\u03BC\u03BF\u03C2', 'http://rss.in.gr/feed/news/world'),
(u'\u0395\u03C0\u03B9\u03C3\u03C4\u03AE\u03BC\u03B7', 'http://rss.in.gr/feed/news/science'),
(u'\u03A0\u03BF\u03BB\u03B9\u03C4\u03B9\u03C3\u03BC\u03CC\u03C2', 'http://rss.in.gr/feed/news/culture')
]

View File

@ -1,86 +1,119 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.independent.co.uk
'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class TheIndependent(BasicNewsRecipe): class AdvancedUserRecipe1320474488(BasicNewsRecipe):
title = 'The Independent'
__author__ = 'Darko Miletic'
description = 'Independent News - Breaking news, comment and features from The Independent newspaper'
publisher = 'The Independent'
category = 'news, politics, UK'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en_GB'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.independent.co.uk/independent.co.uk/images/logo-london.png'
extra_css = """
h1{font-family: Georgia,serif }
body{font-family: Verdana,Arial,Helvetica,sans-serif}
img{margin-bottom: 0.4em; display:block}
.info,.caption,.credits{font-size: x-small}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags =[
dict(name=['meta','link','object','embed','iframe','base','style'])
,dict(attrs={'class':['related-articles','share','googleCols','article-tools','paging','googleArt']})
,dict(attrs={'id':['newsVideoPlayer','yahoobook','google-intext']})
]
keep_only_tags =[dict(attrs={'id':'article'})]
remove_attributes=['lang','onclick','width','xmlns:fb']
title = u'The Independent'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = True
language = 'en_GB'
__author__ = 'NotTaken'
feeds = [ feeds = [
(u'UK' , u'http://www.independent.co.uk/news/uk/rss' ) (u'News - UK',
,(u'World' , u'http://www.independent.co.uk/news/world/rss' ) u'http://www.independent.co.uk/news/uk/?service=rss'),
,(u'Business' , u'http://www.independent.co.uk/news/business/rss' ) (u'News - World',
,(u'People' , u'http://www.independent.co.uk/news/people/rss' ) u'http://www.independent.co.uk/news/world/?service=rss'),
,(u'Science' , u'http://www.independent.co.uk/news/science/rss' ) (u'News - Business',
,(u'Media' , u'http://www.independent.co.uk/news/media/rss' ) u'http://www.independent.co.uk/news/business/?service=rss'),
,(u'Education' , u'http://www.independent.co.uk/news/education/rss' ) (u'News - People',
,(u'Leading Articles' , u'http://www.independent.co.uk/opinion/leading-articles/rss') u'http://www.independent.co.uk/news/people/?service=rss'),
,(u'Comentators' , u'http://www.independent.co.uk/opinion/commentators/rss' ) (u'News - Science',
,(u'Columnists' , u'http://www.independent.co.uk/opinion/columnists/rss' ) u'http://www.independent.co.uk/news/science/?service=rss'),
,(u'Letters' , u'http://www.independent.co.uk/opinion/letters/rss' ) (u'News - Media',
,(u'Big Question' , u'http://www.independent.co.uk/extras/big-question/rss' ) u'http://www.independent.co.uk/news/media/?service=rss'),
,(u'Sport' , u'http://www.independent.co.uk/sport/rss' ) (u'News - Education',
,(u'Life&Style' , u'http://www.independent.co.uk/life-style/rss' ) u'http://www.independent.co.uk/news/education/?service=rss'),
,(u'Arts&Entertainment' , u'http://www.independent.co.uk/arts-entertainment/rss' ) (u'News - Obituaries',
,(u'Travel' , u'http://www.independent.co.uk/travel/rss' ) u'http://rss.feedsportal.com/c/266/f/3531/index.rss'),
,(u'Money' , u'http://www.independent.co.uk/money/rss' ) (u'News - Corrections',
] u'http://www.independent.co.uk/news/corrections/?service=rss'
),
(u'Opinion',
u'http://www.independent.co.uk/opinion/?service=rss'),
(u'Environment',
u'http://www.independent.co.uk/environment/?service=rss'),
(u'Sport - Athletics',
u'http://www.independent.co.uk/sport/general/athletics/?service=rss'
),
(u'Sport - Cricket',
u'http://www.independent.co.uk/sport/cricket/?service=rss'),
(u'Sport - Football',
u'http://www.independent.co.uk/sport/football/?service=rss'),
(u'Sport - Golf',
u'http://www.independent.co.uk/sport/golf/?service=rss'),
(u'Sport - Motor racing',
u'http://www.independent.co.uk/sport/motor-racing/?service=rss'
),
(u'Sport - Olympics',
u'http://rss.feedsportal.com/c/266/f/3800/index.rss'),
(u'Sport - Racing',
u'http://www.independent.co.uk/sport/racing/?service=rss'),
(u'Sport - Rugby League',
u'http://rss.feedsportal.com/c/266/f/3795/index.rss'),
(u'Sport - Rugby Union',
u'http://www.independent.co.uk/sport/rugby/rugby-union/?service=rss'
),
(u'Sport - Sailing',
u'http://www.independent.co.uk/sport/general/sailing/?service=rss'
),
(u'Sport - Tennis',
u'http://www.independent.co.uk/sport/tennis/?service=rss'),
(u'Sport - Others',
u'http://www.independent.co.uk/sport/general/others/?service=rss'
),
(u'Life & Style - Fashion',
u'http://www.independent.co.uk/life-style/fashion/?service=rss'
),
(u'Life & Style -Food & Drink',
u'http://www.independent.co.uk/life-style/food-and-drink/?service=rss'
),
(u'Life & Style - Health and Families',
u'http://www.independent.co.uk/life-style/health-and-families/?service=rss'
),
(u'Life & Style - House & Home',
u'http://www.independent.co.uk/life-style/house-and-home/'),
(u'Life & Style - History',
u'http://www.independent.co.uk/life-style/history/?service=rss'
),
(u'Life & Style - Gadgets & Tech',
u'http://www.independent.co.uk/life-style/gadgets-and-tech/?service=rss'
),
(u'Life & Style - Motoring',
u'http://www.independent.co.uk/life-style/motoring/?service=rss'
),
(u'Arts & Ents - Art',
u'http://www.independent.co.uk/arts-entertainment/art/?service=rss'
),
(u'Arts & Ents - Architecture',
u'http://www.independent.co.uk/arts-entertainment/architecture/?service=rss'
),
(u'Arts & Ents - Music',
u'http://www.independent.co.uk/arts-entertainment/music/?service=rss'
),
(u'Arts & Ents - Classical',
u'http://www.independent.co.uk/arts-entertainment/classical/?service=rss'
),
(u'Arts & Ents - Films',
u'http://www.independent.co.uk/arts-entertainment/films/?service=rss'
),
(u'Arts & Ents - TV',
u'http://www.independent.co.uk/arts-entertainment/tv/?service=rss'
),
(u'Arts & Ents - Theatre and Dance',
u'http://www.independent.co.uk/arts-entertainment/theatre-dance/?service=rss'
),
(u'Arts & Ents - Comedy',
u'http://www.independent.co.uk/arts-entertainment/comedy/?service=rss'
),
(u'Arts & Ents - Books',
u'http://www.independent.co.uk/arts-entertainment/books/?service=rss'
),
(u'Travel', u'http://www.independent.co.uk/travel/?service=rss'
),
(u'Money', u'http://www.independent.co.uk/money/?service=rss'),
(u'IndyBest',
u'http://www.independent.co.uk/extras/indybest/?service=rss'),
(u'Blogs', u'http://blogs.independent.co.uk/feed/rss/'),
]
def get_article_url(self, article):
return article.get('guid', None)
def preprocess_html(self, soup):
for item in soup.body.findAll(style=True):
del item['style']
for item in soup.body.findAll(['author','preform']):
item.name='span'
for item in soup.body.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
for item in soup.body.findAll('div', attrs={'class':['clear-o','body','photoCaption']}):
item.name = 'p'
for item in soup.body.findAll('div'):
if not item.attrs and not item.contents:
item.extract()
soup2 = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
soup2.body.replaceWith(soup.body)
return soup2

View File

@ -44,7 +44,11 @@ class JapanTimes(BasicNewsRecipe):
return rurl.partition('?')[0] return rurl.partition('?')[0]
def print_version(self, url): def print_version(self, url):
return url.replace('/cgi-bin/','/print/') if '/rss/' in url:
return url.replace('.jp/rss/','.jp/print/')
if '/text/' in url:
return url.replace('.jp/text/','.jp/print/')
return url
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

20
recipes/korben.recipe Normal file
View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318619728(BasicNewsRecipe):
title = u'Korben'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Korben', u'http://feeds2.feedburner.com/KorbensBlog-UpgradeYourMind')]
def get_masthead_url(self):
masthead = 'http://korben.info/wp-content/themes/korben-steaw/hab/logo.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -1,36 +1,35 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>' __copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
''' '''
Profile to download KoreaHerald Profile to download KoreaHerald
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class KoreaHerald(BasicNewsRecipe): class KoreaHerald(BasicNewsRecipe):
title = u'KoreaHerald' title = u'KoreaHerald'
language = 'en' language = 'en'
description = u'Korea Herald News articles' description = u'Korea Herald News articles'
__author__ = 'Seongkyoun Yoo' __author__ = 'Seongkyoun Yoo'
oldest_article = 10 oldest_article = 15
recursions = 3 recursions = 3
max_articles_per_feed = 10 max_articles_per_feed = 15
no_stylesheets = True no_stylesheets = True
keep_only_tags = [ keep_only_tags = [
dict(id=['contentLeft', '_article']) dict(id=['contentLeft', '_article'])
] ]
remove_tags = [ remove_tags = [
dict(name='iframe'), dict(name='iframe'),
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}), dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}), dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
] ]
feeds = [ feeds = [
('All News','http://www.koreaherald.com/rss/020000000000.xml'), ('National','http://www.koreaherald.com/rss/020100000000.xml'),
('National','http://www.koreaherald.com/rss/020100000000.xml'), ('Business','http://www.koreaherald.com/rss/020200000000.xml'),
('Business','http://www.koreaherald.com/rss/020200000000.xml'), ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'), ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'), ('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
('Sports','http://www.koreaherald.com/rss/020500000000.xml'), ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'), ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'), ]
]

View File

@ -1,7 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1282101454(BasicNewsRecipe): class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'Kansascity Star' title = 'Kansas City Star'
language = 'en' language = 'en'
__author__ = 'TonytheBookworm' __author__ = 'TonytheBookworm'
description = 'www.kansascity.com feed' description = 'www.kansascity.com feed'

37
recipes/kyungyhang Normal file
View File

@ -0,0 +1,37 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Kyungyhang
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Kyungyhang(BasicNewsRecipe):
title = u'Kyungyhang'
language = 'ko'
description = u'The Kyungyhang Shinmun articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 20
recursions = 2
max_articles_per_feed = 20
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='div', attrs ={'class':['article_title_wrap']}),
dict(name='div', attrs ={'class':['article_txt']})
]
remove_tags_after = dict(id={'sub_bottom'})
remove_tags = [
dict(name='iframe'),
dict(id={'TdHot'}),
dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
dict(name='dl', attrs={'class':['CL']}),
dict(name='ul', attrs={'class':['tab']}),
]
feeds = [
('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
]

View File

@ -1,32 +1,37 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini' __author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>' __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version' description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
''' '''
http://www.repubblica.it/ http://www.repubblica.it/
''' '''
import re import re
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class LaRepubblica(BasicNewsRecipe): class LaRepubblica(BasicNewsRecipe):
title = 'La Repubblica' title = 'La Repubblica'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic' __author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.' description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png' masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
publisher = 'Gruppo editoriale L\'Espresso' publisher = 'Gruppo editoriale L\'Espresso'
category = 'News, politics, culture, economy, general interest' category = 'News, politics, culture, economy, general interest'
language = 'it' language = 'it'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
oldest_article = 5 oldest_article = 5
encoding = 'utf8' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
#recursion = 10 no_stylesheets = True
no_stylesheets = True publication_type = 'newspaper'
extra_css = """ articles_are_obfuscated = True
img{display: block} temp_files = []
""" extra_css = """
img{display: block}
"""
remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'), (re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
@ -35,11 +40,28 @@ class LaRepubblica(BasicNewsRecipe):
] ]
def get_article_url(self, article): def get_article_url(self, article):
link = article.get('id', article.get('guid', None)) link = BasicNewsRecipe.get_article_url(self, article)
if link is None: if link and not '.repubblica.it/' in link:
return article link2 = article.get('id', article.get('guid', None))
return link if link2:
link = link2
return link.rpartition('?')[0]
def get_obfuscated_article(self, url):
count = 0
while (count < 10):
try:
response = self.browser.open(url)
html = response.read()
count = 10
except:
print "Retrying download..."
count += 1
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
keep_only_tags = [ keep_only_tags = [
dict(attrs={'class':'articolo'}), dict(attrs={'class':'articolo'}),
dict(attrs={'class':'body-text'}), dict(attrs={'class':'body-text'}),
@ -49,7 +71,7 @@ class LaRepubblica(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name=['object','link','meta']), dict(name=['object','link','meta','iframe','embed']),
dict(name='span',attrs={'class':'linkindice'}), dict(name='span',attrs={'class':'linkindice'}),
dict(name='div', attrs={'class':'bottom-mobile'}), dict(name='div', attrs={'class':'bottom-mobile'}),
dict(name='div', attrs={'id':['rssdiv','blocco']}), dict(name='div', attrs={'id':['rssdiv','blocco']}),
@ -80,3 +102,11 @@ class LaRepubblica(BasicNewsRecipe):
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml') (u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
] ]
def preprocess_html(self, soup):
for item in soup.findAll(['hgroup','deresponsabilizzazione','per']):
item.name = 'div'
item.attrs = []
for item in soup.findAll(style=True):
del item['style']
return soup

75
recipes/lepoint.recipe Normal file
View File

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
LePoint.fr
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class lepoint(BasicNewsRecipe):
title = 'Le Point'
__author__ = 'calibre'
description = 'Actualités'
encoding = 'utf-8'
publisher = 'LePoint.fr'
category = 'news, France, world'
language = 'fr'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
.info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['entete_chroniqueur']}),
dict(name='div', attrs={'class':['col_article']}),
dict(name='div', attrs={'class':['signature_article']}),
dict(name='div', attrs={'class':['util_font util_article']}),
dict(name='div', attrs={'class':['util_article bottom']})
]
keep_only_tags = [dict(name='div', attrs={'class':['page_article']})]
remove_tags_after = dict(name='div', attrs={'class':['util_article bottom']})
feeds = [
(u'À la une', 'http://www.lepoint.fr/rss.xml'),
('International', 'http://www.lepoint.fr/monde/rss.xml'),
('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'),
('Sciences', 'http://www.lepoint.fr/science/rss.xml'),
('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
(u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'),
('Politique', 'http://www.lepoint.fr/politique/rss.xml'),
(u'Médias', 'http://www.lepoint.fr/medias/rss.xml'),
('Culture', 'http://www.lepoint.fr/culture/rss.xml'),
(u'Santé', 'http://www.lepoint.fr/sante/rss.xml'),
('Sport', 'http://www.lepoint.fr/sport/rss.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_masthead_url(self):
masthead = 'http://www.lepoint.fr/images/commun/logo.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

73
recipes/lexpress.recipe Normal file
View File

@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
Lexpress.fr
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class lepoint(BasicNewsRecipe):
title = 'L\'express'
__author__ = 'calibre'
description = 'Actualités'
encoding = 'cp1252'
publisher = 'LExpress.fr'
category = 'Actualité, France, Monde'
language = 'fr'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.current_parent, p.heure, .ouverture {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
#contenu-article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
.entete { font-weiht:bold;}
'''
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['barre-outil-fb']}),
dict(name='div', attrs={'class':['barre-outils']}),
dict(id='bloc-sommaire'),
dict(id='footer-article')
]
keep_only_tags = [dict(name='div', attrs={'class':['bloc-article']})]
remove_tags_after = dict(id='content-article')
feeds = [
(u'À la une', 'http://www.lexpress.fr/rss/alaune.xml'),
('International', 'http://www.lexpress.fr/rss/monde.xml'),
('Tech/Web', 'http://www.lexpress.fr/rss/high-tech.xml'),
(u'Sciences/Santé', 'http://www.lexpress.fr/rss/science-et-sante.xml'),
(u'Envronnement', 'http://www.lexpress.fr/rss/environnement.xml'),
('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
(u'Socièté', 'http://www.lexpress.fr/rss/societe.xml'),
('Politique', 'http://www.lexpress.fr/rss/politique.xml'),
(u'Médias', 'http://www.lexpress.fr/rss/medias.xml'),
('Culture', 'http://www.lexpress.fr/rss/culture.xml'),
('Sport', 'http://www.lexpress.fr/rss/sport.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_masthead_url(self):
masthead = 'http://static.lexpress.fr/imgstat/logo_lexpress.gif'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -9,39 +9,72 @@ liberation.fr
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Liberation(BasicNewsRecipe): class Liberation(BasicNewsRecipe):
title = u'Liberation' title = u'Liberation'
__author__ = 'Darko Miletic' __author__ = 'calibre'
description = 'News from France' description = 'Actualités'
language = 'fr' category = 'Actualités, France, Monde'
language = 'fr'
oldest_article = 7 use_embedded_content = False
max_articles_per_feed = 100 timefmt = ' [%d %b %Y]'
no_stylesheets = True max_articles_per_feed = 15
use_embedded_content = False no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
html2lrf_options = ['--base-font-size', '10'] extra_css = '''
h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
h4, h5, h2.rubrique, {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.mna-body, entry-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
keep_only_tags = [ keep_only_tags = [
dict(name='h1') dict(name='div', attrs={'class':'article'})
#,dict(name='div', attrs={'class':'object-content text text-item'}) ,dict(name='div', attrs={'class':'text-article m-bot-s1'})
,dict(name='div', attrs={'class':'article'}) ,dict(name='div', attrs={'class':'entry'})
#,dict(name='div', attrs={'class':'articleContent'}) ,dict(name='div', attrs={'class':'col_contenu'})
,dict(name='div', attrs={'class':'entry'}) ]
]
remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ] remove_tags_after = [
dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
,dict(name='p',attrs={'class':['chapo']})
,dict(id='_twitter_facebook')
]
remove_tags = [ remove_tags = [
dict(name='p', attrs={'class':'clear'}) dict(name='iframe')
,dict(name='ul', attrs={'class':'floatLeft clear'}) ,dict(name='a', attrs={'class':'lnk-comments'})
,dict(name='div', attrs={'class':'clear floatRight'}) ,dict(name='div', attrs={'class':'toolbox'})
,dict(name='object') ,dict(name='ul', attrs={'class':'share-box'})
,dict(name='div', attrs={'class':'toolbox'}) ,dict(name='ul', attrs={'class':'tool-box'})
,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'}) ,dict(name='ul', attrs={'class':'rub'})
#,dict(name='div', attrs={'class':'clear block block-call-items'}) ,dict(name='p',attrs={'class':['chapo']})
,dict(name='div', attrs={'class':'block-content'}) ,dict(name='p',attrs={'class':['tag']})
,dict(name='div',attrs={'class':['blokLies']})
,dict(name='div',attrs={'class':['alire']})
,dict(id='_twitter_facebook')
] ]
feeds = [ feeds = [
(u'La une', u'http://www.liberation.fr/rss/laune') (u'La une', u'http://rss.liberation.fr/rss/9/')
,(u'Monde' , u'http://www.liberation.fr/rss/monde') ,(u'Monde' , u'http://www.liberation.fr/rss/10/')
,(u'Sports', u'http://www.liberation.fr/rss/sports') ,(u'Économie', u'http://www.liberation.fr/rss/13/')
,(u'Politiques', u'http://www.liberation.fr/rss/11/')
,(u'Société', u'http://www.liberation.fr/rss/12/')
,(u'Cinéma', u'http://www.liberation.fr/rss/58/')
,(u'Écran', u'http://www.liberation.fr/rss/53/')
,(u'Sports', u'http://www.liberation.fr/rss/12/')
] ]
def get_masthead_url(self):
masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -22,7 +22,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
publication_type = 'newspaper' publication_type = 'newspaper'
delay = 1 delay = 1
remove_empty_feeds = True remove_empty_feeds = True
cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_md_1.jpg') cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_gd_1.jpg')
masthead_url = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg' masthead_url = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg'
extra_css = """ body{font-family: Arial,Helvetica,sans-serif } extra_css = """ body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em} img{margin-bottom: 0.4em}

43
recipes/men24_gr.recipe Normal file
View File

@ -0,0 +1,43 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class Men24(BasicNewsRecipe):
title = 'Men24.gr'
__author__ = 'Stelios'
description = 'Greek Mens portal'
oldest_article = 14
max_articles_per_feed = 100
language = 'el'
cover_url = 'http://www.men24.gr/ast/img/men24Logo.jpg'
category = 'magazines, GR'
language = 'el'
encoding = 'windows-1253'
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
extra_css = '''
.artPrintTitle{font-family :Arial,Helvetica,sans-serif; font-weight: bold; font-size:large;}
.artPrintSubtitle{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
'''
remove_tags = [
dict(name='td', attrs={'class':['artPrintCategory']}),
dict(name='table', attrs={'class':['footer']}),
dict(name='img')
]
feeds = [
(u'\u038C\u03BB\u03B5\u03C2 \u03BF\u03B9 \u03B5\u03B9\u03B4\u03AE\u03C3\u03B5\u03B9\u03C2', 'http://www.men24.gr/svc/rss/lastNews/'),
(u'\u03A3\u03C4\u03C5\u03BB', 'http://www.men24.gr/svc/rss/categoryNews/?category=style'),
(u'Fitness', 'http://www.men24.gr/svc/rss/categoryNews/?category=fitness'),
(u'Gadgets', 'http://www.men24.gr/svc/rss/categoryNews/?category=gadgets'),
(u'\u0394\u03B9\u03B1\u03C3\u03BA\u03AD\u03B4\u03B1\u03C3\u03B7', 'http://www.men24.gr/svc/rss/categoryNews/?category=fun'),
(u'\u03A7\u03C1\u03AE\u03BC\u03B1 \u03BA\u03B1\u03B9 \u039A\u03B1\u03C1\u03B9\u03AD\u03C1\u03B1', 'http://www.men24.gr/svc/rss/categoryNews/?category=money'),
(u'Special Edition', 'http://www.men24.gr/svc/rss/categoryNews/?category=special'),
(u'\u0388\u03C1\u03C9\u03C4\u03B1\u03C2 \u03BA\u03B1\u03B9 Sex', 'http://www.men24.gr/svc/rss/categoryNews/?category=love'),
(u'\u0386\u03BD\u03C4\u03C1\u03B5\u03C2 \u03C4\u03BF\u03C5 24', 'http://www.men24.gr/svc/rss/categoryNews/?category=men'),
(u'\u0393\u03C5\u03BD\u03B1\u03AF\u03BA\u03B5\u03C2', 'http://www.men24.gr/svc/rss/categoryNews/?category=women'),
(u'\u039F\u03B4\u03B7\u03B3\u03BF\u03AF', 'http://www.men24.gr/svc/rss/categoryNews/?category=guides'),
(u'\u03A4\u03B6\u03CC\u03B3\u03BF\u03C2', 'http://www.men24.gr/svc/rss/categoryNews/?category=gamble')
]
def print_version(self, url):
return url.replace('.asp', '.print.asp')

View File

@ -1,9 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.utils.magick import Image
''' Version 1.2, updated cover image to match the changed website.
added info date on title
version 1.4 Updated tags, delay and added autoclean 22-09-2011
version 1.5 Changes due to changes in site
version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes
Added som processing on pictures
Removed links in html
Removed extre white characters
changed handling of self closing span
'''
class AdvancedUserRecipe1306097511(BasicNewsRecipe): class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro Nieuws NL' title = u'Metro Nieuws NL'
# Version 1.2, updated cover image to match the changed website.
# added info date on title
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = u'DrMerry' __author__ = u'DrMerry'
@ -11,8 +23,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
language = u'nl' language = u'nl'
simultaneous_downloads = 5 simultaneous_downloads = 5
#delay = 1 #delay = 1
auto_cleanup = True #auto_cleanup = True
auto_cleanup_keep = '//div[@class="article-image-caption-2column"]|//div[@id="date"]' #auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
timefmt = ' [%A, %d %b %Y]' timefmt = ' [%A, %d %b %Y]'
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
@ -20,22 +32,73 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg' cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
publication_type = 'newspaper' publication_type = 'newspaper'
remove_tags_before = dict(name='div', attrs={'id':'date'}) remove_tags_before = dict(name='div', attrs={'id':'date'})
remove_tags_after = dict(name='div', attrs={'id':'column-1-3'}) remove_tags_after = dict(name='div', attrs={'class':'article-body'})
encoding = 'utf-8' encoding = 'utf-8'
extra_css = 'body{font-size:12px} #date, .article-image-caption {font-size: 0.583em} h2 {font-size: 0.917em} p.small, span, li, li span span, p, b, i, u, p.small.article-paragraph, p.small.article-paragraph p, p.small.article-paragraph span, p span, span {font-size: 0.833em} h1 {font-size: 1em}' remove_attributes = ['style', 'font', 'width', 'height']
use_embedded_content = False
extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
#date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
.article-box-fact.module-title {clear:both;border-top:1px solid black;border-bottom:4px solid black;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;line-height: 1.15;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
.article-body p{padding-bottom:10px;}div.column-1-3{float: left;display: inline;width: 567px;margin-left: 19px;border-right: 1px solid #CACACA;padding-right: 9px;}\
div.column-1-2 {float: left;display: inline;width: 373px;padding-right: 7px;border-right: 1px solid #CACACA;}\
p.article-image-caption {font-size: 12px;font-weight: 300;line-height: 1.4;color: #616262;margin-top: 5px;} \
p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
keep_only_tags = [dict(name='div', attrs={'class':[ 'article-image-caption-2column', 'article-image-caption-3column', 'article-body', 'article-box-fact']}),
dict(name='div', attrs={'id':['date']}),
dict(name='h1', attrs={'class':['title']}),
dict(name='h2', attrs={'class':['subtitle']})]
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap', remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links', 'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}), 'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar']}), dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
dict(name='iframe')] dict(name='iframe')]
preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->)', re.DOTALL|re.IGNORECASE),lambda match: ''),
(re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
(re.compile(r'([\s>])([^\s>]+)(<span[^>]+) />', re.DOTALL|re.IGNORECASE),
lambda match: match.group(1) + match.group(3) + '>' + match.group(2) + '</span>'),
]
def postprocess_html(self, soup, first):
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl = tag['src']
img = Image()
img.open(iurl)
#width, height = img.size
#print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
img.trim(0)
img.save(iurl)
'''
#width, height = img.size
#print '***TRIMMED img width is: ', width, 'height is: ', height
left=0
top=0
border_color='#ffffff'
width, height = img.size
#print '***retrieved img width is: ', width, 'height is: ', height
height_correction = 1.17
canvas = create_canvas(width, height*height_correction,border_color)
canvas.compose(img, left, top)
#img = canvas
canvas.save(iurl)
#width, height = canvas.size
#print '***NEW img width is: ', width, 'height is: ', height
'''
return soup
feeds = [ feeds = [
(u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'), (u'Binnenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-3'),
(u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'), (u'Economie', u'http://www.metronieuws.nl/rss.xml?c=1278070988-0'),
(u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'), (u'Den Haag', u'http://www.metronieuws.nl/rss.xml?c=1289013337-3'),
(u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'), (u'Rotterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-2'),
(u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'), (u'Amsterdam', u'http://www.metronieuws.nl/rss.xml?c=1289013337-1'),
(u'Buitenland', u'http://www.metronieuws.nl/rss.xml?c=1277377288-4'),
(u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'), (u'Columns', u'http://www.metronieuws.nl/rss.xml?c=1277377288-17'),
(u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'), (u'Entertainment', u'http://www.metronieuws.nl/rss.xml?c=1277377288-2'),
(u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'), (u'Dot', u'http://www.metronieuws.nl/rss.xml?c=1283166782-12'),

View File

@ -4,26 +4,31 @@ __copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto # Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Hong Kong' __Region__ = 'Hong Kong'
# Users of Kindle 3 with limited system-level CJK support # Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False". # please replace the following "True" with "False". (Default: True)
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn below to True if your device supports display of CJK titles # Turn below to True if your device supports display of CJK titles (Default: False)
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Set it to False if you want to skip images # Set it to False if you want to skip images (Default: True)
__KeepImages__ = True __KeepImages__ = True
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source # (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
__UseLife__ = True __UseLife__ = True
# (HK only) It is to disable the column section which is now a premium content # (HK only) It is to disable premium content (Default: False)
__InclCols__ = False __InclPremium__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats # (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
__ParsePFF__ = False __ParsePFF__ = True
# (HK only) Turn below to True if you wish hi-res images # (HK only) Turn below to True if you wish hi-res images (Default: False)
__HiResImg__ = False __HiResImg__ = False
# Override the date returned by the program if specifying a YYYYMMDD below
__Date__ = ''
''' '''
Change Log: Change Log:
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles 2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional. 2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly. 2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free. 2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -72,7 +77,7 @@ class MPRecipe(BasicNewsRecipe):
dict(attrs={'class':['content']}), # for content from txt dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}), dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
dict(attrs={'class':['images']}) # for images from txt dict(attrs={'class':['images']}) # for images from txt
] ]
if __KeepImages__: if __KeepImages__:
@ -169,13 +174,22 @@ class MPRecipe(BasicNewsRecipe):
return dt_local return dt_local
def get_fetchdate(self): def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d") if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self): def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d") if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self): def get_fetchday(self):
return self.get_dtlocal().strftime("%d") if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
@ -208,18 +222,21 @@ class MPRecipe(BasicNewsRecipe):
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal') (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]: ]:
articles = self.parse_section2(url, keystr) if __InclPremium__ == True:
articles = self.parse_section2_txt(url, keystr)
else:
articles = self.parse_section2(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
if __InclCols__ == True: if __InclPremium__ == True:
# parse column section articles directly from .txt files # parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl') for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]: ]:
articles = self.parse_section2_txt(url, keystr) articles = self.parse_section2_txt(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
@ -253,10 +270,10 @@ class MPRecipe(BasicNewsRecipe):
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) # feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]: for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
articles = self.parse_section2(url, keystr) articles = self.parse_section2_txt(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: # (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url) # articles = self.parse_section(url)
@ -270,18 +287,18 @@ class MPRecipe(BasicNewsRecipe):
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal') for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]: ]:
articles = self.parse_section2(url, keystr) articles = self.parse_section2_txt(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
if __InclCols__ == True: if __InclPremium__ == True:
# parse column section articles directly from .txt files # parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl') for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]: ]:
articles = self.parse_section2_txt(url, keystr) articles = self.parse_section2_txt(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
@ -333,7 +350,7 @@ class MPRecipe(BasicNewsRecipe):
url = 'http://news.mingpao.com/' + dateStr + '/' +url url = 'http://news.mingpao.com/' + dateStr + '/' +url
# replace the url to the print-friendly version # replace the url to the print-friendly version
if __ParsePFF__ == True: if __ParsePFF__ == True:
if url.rfind('Redirect') <> -1: if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
url = re.sub(dateStr + '.*' + dateStr, dateStr, url) url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
url = re.sub('%2F.*%2F', '/', url) url = re.sub('%2F.*%2F', '/', url)
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '') title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
@ -349,6 +366,8 @@ class MPRecipe(BasicNewsRecipe):
# parse from life.mingpao.com # parse from life.mingpao.com
def parse_section2(self, url, keystr): def parse_section2(self, url, keystr):
br = mechanize.Browser()
br.set_handle_redirect(False)
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href=True) a = soup.findAll('a', href=True)
@ -359,9 +378,13 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(i) title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article try:
current_articles.append({'title': title, 'url': url, 'description': ''}) br.open_novisit(url)
included_urls.append(url) url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
except:
print 'skipping a premium article'
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
@ -382,7 +405,7 @@ class MPRecipe(BasicNewsRecipe):
included_urls.append(url) included_urls.append(url)
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
# parse from www.mingpaovan.com # parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl): def parse_section3(self, url, baseUrl):
self.get_fetchdate() self.get_fetchdate()
@ -467,53 +490,8 @@ class MPRecipe(BasicNewsRecipe):
# preprocess those .txt and javascript based files # preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url): def preprocess_raw_html(self, raw_html, url):
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010') new_html = raw_html
if __HiResImg__ == True: if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
raw_html = raw_html.replace(img, newimg)
else:
# if not found, insert _ after "
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
gifimg = img.replace('jpg\'', 'gif\'')
try:
#print 'Original: ', url
#print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
gifurl = re.sub(r'dailynews.*txt', '', url)
#print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
#print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
#br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
#print 'GIF not found'
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'newimg: ', newimg
raw_html = raw_html.replace(img, newimg)
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
return raw_html
else:
if url.rfind('_print.htm') <> -1: if url.rfind('_print.htm') <> -1:
# javascript based file # javascript based file
splitter = re.compile(r'\n') splitter = re.compile(r'\n')
@ -548,8 +526,8 @@ class MPRecipe(BasicNewsRecipe):
photo = photo.replace('</td>', '<br>') photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '') photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>' new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
return new_raw_html + '</body></html>' new_html = new_raw_html + '</body></html>'
else: else:
# .txt based file # .txt based file
splitter = re.compile(r'\n') # Match non-digits splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">' new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
@ -557,28 +535,105 @@ class MPRecipe(BasicNewsRecipe):
title_started = False title_started = False
met_article_start_char = False met_article_start_char = False
for item in splitter.split(raw_html): for item in splitter.split(raw_html):
item = item.strip()
if item.startswith(u'\u3010'): if item.startswith(u'\u3010'):
met_article_start_char = True met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n' new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else: else:
if next_is_img_txt == False: if next_is_img_txt == False:
if item.startswith('='): if item.startswith("=@"):
print 'skip movie link'
elif item.startswith("=?"):
next_is_img_txt = True next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n' new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
elif item.startswith('=='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[2:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
elif item.startswith('='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[1:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else: else:
if met_article_start_char == False: if next_is_img_txt == False and met_article_start_char == False:
if title_started == False: if item <> '':
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n' if title_started == False:
title_started = True #print 'Title started at ', item
else: new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
new_raw_html = new_raw_html + item + '\n' title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else: else:
new_raw_html = new_raw_html + item + '<p>\n' new_raw_html = new_raw_html + item + '<p>\n'
else: else:
next_is_img_txt = False next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n' new_raw_html = new_raw_html + item + '\n'
return new_raw_html + '</div></body></html>' new_html = new_raw_html + '</div></body></html>'
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg)
else:
# if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg\'', 'gif\'')
try:
gifurl = re.sub(r'dailynews.*txt', '', url)
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
new_html = new_html.replace(img, newimg)
# repeat with src quoted by double quotes, for text parsed from src txt
imglist = re.findall('src="?.*?jpg"', new_html)
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg"', 'gif"')
try:
#print 'url', url
pos = url.rfind('/')
gifurl = url[:pos+1]
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.find('"')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg)
return new_html
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
@ -587,7 +642,7 @@ class MPRecipe(BasicNewsRecipe):
for item in soup.findAll(stype=True): for item in soup.findAll(stype=True):
del item['absmiddle'] del item['absmiddle']
return soup return soup
def create_opf(self, feeds, dir=None): def create_opf(self, feeds, dir=None):
if dir is None: if dir is None:
dir = self.output_dir dir = self.output_dir
@ -678,7 +733,7 @@ class MPRecipe(BasicNewsRecipe):
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'), parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc) play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:

View File

@ -8,7 +8,7 @@ class AdvancedUserRecipe1294342201(BasicNewsRecipe):
title = u'New London Day' title = u'New London Day'
__author__ = 'Being' __author__ = 'Being'
description = 'State, local and business news from New London, CT' description = 'State, local and business news from New London, CT'
language = 'en_GB' language = 'en'
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 200 max_articles_per_feed = 200

48
recipes/newsbeast.recipe Normal file
View File

@ -0,0 +1,48 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class newsbeast(BasicNewsRecipe):
title = 'Newsbeast'
__author__ = 'Stelios'
description = 'News from Greece'
oldest_article = 2
max_articles_per_feed = 100
publisher = 'newsbeast'
category = 'news, GR'
language = 'el'
encoding = 'utf8'
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
encoding = 'utf8'
keep_only_tags = [
dict(name='div', attrs={'class' : ['article-title']}),
# dict(name='img', attrs={'class' : ['article_photo']}),
#If enabled feeds exceede 15MB
dict(name='div', attrs={'class' : ['txt']})
]
remove_tags = [
dict(name='table', attrs={'id':['artFoot']}),
dict(name='img'),
#If removed feeds exceede 15MB
dict(name='p', attrs={'class':['article-details']})
]
feeds = [
(u'\u0395\u03BB\u03BB\u03AC\u03B4\u03B1', 'http://www.newsbeast.gr/feeds/greece'),
(u'\u039A\u03CC\u03C3\u03BC\u03BF\u03C2', 'http://www.newsbeast.gr/feeds/world'),
(u'\u03A0\u03BF\u03BB\u03B9\u03C4\u03B9\u03BA\u03AE', 'http://www.newsbeast.gr/feeds/politiki'),
(u'\u039F\u03B9\u03BA\u03BF\u03BD\u03BF\u03BC\u03AF\u03B1', 'http://www.newsbeast.gr/feeds/financial'),
(u'\u0391\u03B8\u03BB\u03B7\u03C4\u03B9\u03BA\u03AC', 'http://www.newsbeast.gr/feeds/sports'),
(u'\u039A\u03BF\u03B9\u03BD\u03C9\u03BD\u03AF\u03B1', 'http://www.newsbeast.gr/feeds/society'),
(u'\u03A0\u03B5\u03C1\u03B9\u03B2\u03AC\u03BB\u03BB\u03BF\u03BD', 'http://www.newsbeast.gr/feeds/environment'),
(u'Media', 'http://www.newsbeast.gr/feeds/media'),
(u'\u0394\u03B9\u03B1\u03C3\u03BA\u03AD\u03B4\u03B1\u03C3\u03B7', 'http://www.newsbeast.gr/feeds/entertainment'),
(u'Lifestyle', 'http://www.newsbeast.gr/feeds/lifestyle'),
(u'\u03A4\u03B5\u03C7\u03BD\u03BF\u03BB\u03BF\u03B3\u03AF\u03B1', 'http://www.newsbeast.gr/feeds/technology'),
(u'\u0391\u03C5\u03C4\u03BF\u03BA\u03AF\u03BD\u03B7\u03C4\u03BF', 'http://www.newsbeast.gr/feeds/car'),
(u'\u0393\u03C5\u03BD\u03B1\u03AF\u03BA\u03B1', 'http://www.newsbeast.gr/feeds/woman'),
(u'\u03A5\u03B3\u03B5\u03AF\u03B1', 'http://www.newsbeast.gr/feeds/health'),
(u'\u03A0\u03BF\u03BB\u03B9\u03C4\u03B9\u03C3\u03BC\u03CC\u03C2', 'http://www.newsbeast.gr/feeds/culture'),
(u'\u038C,\u03C4\u03B9 \u03BD\u03B1 \u03BD\u03B1\u03B9', 'http://www.newsbeast.gr/feeds/weird')
]

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.nin.co.rs www.nin.co.rs
''' '''
@ -29,6 +29,7 @@ class Nin(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
language = 'sr' language = 'sr'
publication_type = 'magazine' publication_type = 'magazine'
masthead_url = 'http://www.nin.co.rs/img/head/logo.jpg'
extra_css = """ extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Verdana, Lucida, sans1, sans-serif} body{font-family: Verdana, Lucida, sans1, sans-serif}
@ -72,9 +73,11 @@ class Nin(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
cover_url = None cover_url = None
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)
link_item = soup.find('img',attrs={'width':'100','border':'0'}) for item in soup.findAll('a', href=True):
if link_item: if item['href'].startswith('/pages/issue.php?id='):
cover_url = self.PREFIX + link_item['src'] simg = item.find('img')
if simg:
return self.PREFIX + item.img['src']
return cover_url return cover_url
def parse_index(self): def parse_index(self):

View File

@ -10,9 +10,8 @@ class AdvancedUserRecipe1286819935(BasicNewsRecipe):
remove_attributes = ['style'] remove_attributes = ['style']
language = 'ru' language = 'ru'
feeds = [(u'Articles', u'http://www.novayagazeta.ru/rss_number.xml')] feeds = [(u'Articles', u'http://www.novayagazeta.ru/rss/all.xml')]
def print_version(self, url): def print_version(self, url):
return url + '?print=true' return '%s%s' % (url, '?print=1')

20
recipes/omgubuntu.recipe Normal file
View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318619832(BasicNewsRecipe):
title = u'OmgUbuntu'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Omg Ubuntu', u'http://feeds.feedburner.com/d0od')]
def get_masthead_url(self):
masthead = 'http://cdn.omgubuntu.co.uk/wp-content/themes/omgubuntu/images/logo.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

47
recipes/phoronix.recipe Normal file
View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
Fetch phoronix.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class cdnet(BasicNewsRecipe):
title = 'Phoronix'
__author__ = 'calibre'
description = 'Actualités Phoronix'
encoding = 'utf-8'
publisher = 'Phoronix.com'
category = 'news, IT, linux'
language = 'en'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 25
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
h2 {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.KonaBody {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags = []
remove_tags_before = dict(id='phxcms_content_phx')
remove_tags_after = dict(name='div', attrs={'class':'KonaBody'})
feeds = [('Phoronix', 'http://feeds.feedburner.com/Phoronix')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

26
recipes/protagon.recipe Normal file
View File

@ -0,0 +1,26 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class protagon(BasicNewsRecipe):
title = 'Protagon'
__author__ = 'Stelios'
description = 'Opinion articles in Greek'
oldest_article = 7
max_articles_per_feed = 100
publisher = 'Various'
category = 'GR'
language = 'el'
encoding = 'utf8'
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
keep_only_tags = [
dict(name='h1', attrs={'id' : ['title']}),
dict(name='div', attrs={'class' : ['freetext']})
]
feeds = [
(u'\u0398\u03AD\u03BC\u03B1\u03C4\u03B1', 'http://www.protagon.gr/rss?i=protagon.el.8emata')
]

View File

@ -0,0 +1,19 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Real_world_economics_review(BasicNewsRecipe):
title = u'Real-world economis review blog'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
__author__ = 'Julio Map'
language = 'en'
no_stylesheets = True
keep_only_tags = dict(name='div', attrs={'id':'main'})
remove_tags = [dict(name='div', attrs={'id':'postpath'}),
dict(name='div', attrs={'class':'robots-nocontent sd-block sd-social sd-social-icon-text sd-sharing'}),
dict(name='div', attrs={'class':'sharedaddy sd-sharing-enabled'})
]
feeds = [(u'Real-World Economics Review Blog', u'http://rwer.wordpress.com/feed/')]

View File

@ -27,12 +27,12 @@ class ScienceAAS(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.open(self.LOGIN) br.open(self.LOGIN)
br.select_form(name='registered_users_form') br.select_form(nr=1)
br['username'] = self.username br['username'] = self.username
br['code' ] = self.password br['code' ] = self.password
br.submit() br.submit()
return br return br
keep_only_tags = [ dict(name='div', attrs={'id':'LegacyContent'}) ] keep_only_tags = [ dict(name='div', attrs={'id':'content-block'}) ]
feeds = [(u"Science: Current Issue", u'http://www.sciencemag.org/rss/current.xml')] feeds = [(u"Science: Current Issue", u'http://www.sciencemag.org/rss/current.xml')]

View File

@ -40,7 +40,7 @@ class Sciencenews(BasicNewsRecipe):
,dict(name='div', attrs={'class': 'embiggen'}) ,dict(name='div', attrs={'class': 'embiggen'})
] ]
feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')] feeds = [(u"Science News / News Items", u'http://sciencenews.org/index.php/feed/type/news/name/news.rss/view/feed/name/all.rss')]
def get_cover_url(self): def get_cover_url(self):
cover_url = None cover_url = None

14
recipes/sigma_live.recipe Normal file
View File

@ -0,0 +1,14 @@
from calibre.web.feeds.news import BasicNewsRecipe
class sigmalive(BasicNewsRecipe):
title = u'SigmaLive'
__author__ = 'Stelios'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
category = 'news, CY'
description = 'Cypriot News'
language = 'el'
encoding = 'utf8'
feeds = [(u'sigmalive', u'http://sigmalive.com/rss/latest')]

View File

@ -0,0 +1,22 @@
__license__ = 'GPL v3'
__copyright__ = '2011 Neil Grogan'
#
# Silicon Republic Recipe
#
from calibre.web.feeds.news import BasicNewsRecipe
class SiliconRepublic(BasicNewsRecipe):
title = u'Silicon Republic'
oldest_article = 7
max_articles_per_feed = 100
__author__ = u'Neil Grogan'
language = 'en_IE'
remove_tags = [dict(attrs={'class':['thumb','txt','compactbox','icons','catlist','catlistinner','taglist','taglistinner','social','also-in','also-in-inner','also-in-footer','zonek-dfp','paneladvert','rcadvert','panel','h2b']}),
dict(id=['header','logo','header-right','sitesearch','rsslinks','topnav','topvideos','topvideos-list','topnews','topnews-list','slideshow','slides','compactheader','compactnews','compactfeatures','article-type','contactlinks-header','banner-zone-k-dfp','footer-related','directory-services','also-in-section','featuredrelated1','featuredrelated2','featuredrelated3','featuredrelated4','advert2-dfp']),
dict(name=['script', 'style'])]
feeds = [(u'News', u'http://www.siliconrepublic.com/feeds/')]

37
recipes/skai.recipe Normal file
View File

@ -0,0 +1,37 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class SKAI(BasicNewsRecipe):
title = 'SKAI'
__author__ = 'Stelios'
description = 'News from Greece'
oldest_article = 2
max_articles_per_feed = 100
publisher = 'skai.gr'
category = 'news, GR'
language = 'el'
encoding = 'utf8'
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
encoding = 'utf8'
keep_only_tags = [
dict(name='h1'),
dict(name='div', attrs={'class' : ['articleText']})
]
feeds = [
(u'\u039A\u03C5\u03C1\u03B9\u03CC\u03C4\u03B5\u03C1\u03B5\u03C2 \u0395\u03B9\u03B4\u03AE\u03C3\u03B5\u03B9\u03C2', 'http://feeds.feedburner.com/skai/Uulu'),
(u'\u0395\u03BB\u03BB\u03AC\u03B4\u03B1', 'http://feeds.feedburner.com/skai/PLwa'),
(u'\u039A\u03CC\u03C3\u03BC\u03BF\u03C2', 'http://feeds.feedburner.com/skai/aqOL'),
(u'\u03A0\u03BF\u03BB\u03B9\u03C4\u03B9\u03BA\u03AE','http://feeds.feedburner.com/skai/yinm'),
(u'\u039F\u03B9\u03BA\u03BF\u03BD\u03BF\u03BC\u03AF\u03B1', 'http://feeds.feedburner.com/skai/oPUt'),
(u'\u03A4\u03B5\u03C7\u03BD\u03BF\u03BB\u03BF\u03B3\u03AF\u03B1', 'http://feeds.feedburner.com/skai/fqsg'),
(u'\u0391\u03B8\u03BB\u03B7\u03C4\u03B9\u03C3\u03BC\u03CC\u03C2', 'http://feeds.feedburner.com/skai/TfmK'),
(u'\u03A5\u03B3\u03B5\u03AF\u03B1', 'http://feeds.feedburner.com/skai/TABn'),
(u'\u03A0\u03BF\u03BB\u03B9\u03C4\u03B9\u03C3\u03BC\u03CC\u03C2', 'http://feeds.feedburner.com/skai/ppGl'),
(u'\u0391\u03C5\u03C4\u03BF\u03BA\u03AF\u03BD\u03B7\u03C3\u03B7', 'http://feeds.feedburner.com/skai/HCCc'),
(u'\u03A0\u03B5\u03C1\u03B9\u03B2\u03AC\u03BB\u03BB\u03BF\u03BD', 'http://feeds.feedburner.com/skai/jVWs'),
(u'\u03A0\u03B1\u03C1\u03AC\u03BE\u03B5\u03BD\u03B1', 'http://feeds.feedburner.com/skai/bpAR')
]

View File

@ -12,21 +12,18 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DailyTelegraph(BasicNewsRecipe): class DailyTelegraph(BasicNewsRecipe):
title = u'The Australian' title = u'The Australian'
__author__ = u'Matthew Briggs and Sujata Raman' __author__ = u'Matthew Briggs and Sujata Raman'
description = u'National broadsheet newspaper from down under - colloquially known as The Oz' description = (u'National broadsheet newspaper from down under - colloquially known as The Oz'
'. You will need to have a subscription to '
'http://www.theaustralian.com.au to get full articles.')
language = 'en_AU' language = 'en_AU'
oldest_article = 2 oldest_article = 2
needs_subscription = 'optional'
max_articles_per_feed = 30 max_articles_per_feed = 30
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Australia'
, '--publisher' , title
]
keep_only_tags = [dict(name='div', attrs={'id': 'story'})] keep_only_tags = [dict(name='div', attrs={'id': 'story'})]
#remove_tags = [dict(name=['object','link'])] #remove_tags = [dict(name=['object','link'])]
@ -67,6 +64,19 @@ class DailyTelegraph(BasicNewsRecipe):
(u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'), (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
(u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')] (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username and self.password:
br.open('http://www.theaustralian.com.au')
br.select_form(nr=0)
br['username'] = self.username
br['password'] = self.password
raw = br.submit().read()
if '>log out' not in raw.lower():
raise ValueError('Failed to log in to www.theaustralian.com.au'
' are your username and password correct?')
return br
def get_article_url(self, article): def get_article_url(self, article):
return article.id return article.id
@ -76,14 +86,4 @@ class DailyTelegraph(BasicNewsRecipe):
#return br.geturl() #return br.geturl()
def get_cover_url(self):
href = 'http://www.theaustralian.news.com.au/'
soup = self.index_to_soup(href)
img = soup.find('img',alt ="AUS HP promo digital2")
print img
if img :
cover_url = img['src']
return cover_url

View File

@ -1,37 +1,64 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008 - 2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
thescotsman.scotsman.com www.scotsman.com/the-scotsman
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TheScotsman(BasicNewsRecipe): class TheScotsman(BasicNewsRecipe):
title = u'The Scotsman' title = 'The Scotsman'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Scotland' description = 'News from Scotland'
oldest_article = 7 publisher = 'Johnston Publishing Ltd.'
category = 'news, politics, Scotland, UK'
oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = 'en_GB' language = 'en_GB'
encoding = 'utf-8'
simultaneous_downloads = 1 publication_type = 'newspaper'
remove_empty_feeds = True
keep_only_tags = [dict(name='div', attrs={'id':'viewarticle'})] masthead_url = 'http://www.scotsman.com/webimage/swts_thescotsman_image_e_7_25526!image/3142543874.png_gen/derivatives/default/3142543874.png'
remove_tags = [ extra_css = 'body{font-family: Arial,Helvetica,sans-serif}'
dict(name='div' , attrs={'class':'viewarticlepanel' })
]
keep_only_tags = [dict(attrs={'class':'editorialSection'})]
remove_tags_after = dict(attrs={'class':'socialBookmarkPanel'})
remove_tags = [
dict(name=['meta','iframe','object','embed','link']),
dict(attrs={'class':['secondaryArticlesNav','socialBookmarkPanel']}),
dict(attrs={'id':'relatedArticles'})
]
remove_attributes = ['lang']
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
feeds = [ feeds = [
(u'Latest National News', u'http://thescotsman.scotsman.com/getFeed.aspx?Format=rss&sectionid=4068'), ('Latest News' , 'http://www.scotsman.com/cmlink/1.957140' ),
('UK', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7071&format=rss'), ('UK' , 'http://www.scotsman.com/cmlink/1.957142' ),
('Scotland', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7042&format=rss'), ('Scotland' , 'http://www.scotsman.com/cmlink/1.957141' ),
('International', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7000&format=rss'), ('International', 'http://www.scotsman.com/cmlink/1.957143' ),
('Politics', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6990&format=rss'), ('Politics' , 'http://www.scotsman.com/cmlink/1.957044' ),
('Entertainment', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7010&format=rss'), ('Arts' , 'http://www.scotsman.com/cmlink/1.1804825'),
('Features', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6996&format=rss'), ('Entertainment', 'http://www.scotsman.com/cmlink/1.957053' ),
('Opinion', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7074&format=rss'), ('Sports' , 'http://www.scotsman.com/cmlink/1.957151' ),
('Business' , 'http://www.scotsman.com/cmlink/1.957156' ),
('Features' , 'http://www.scotsman.com/cmlink/1.957149' ),
('Opinion' , 'http://www.scotsman.com/cmlink/1.957054' )
] ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

39
recipes/tovima.recipe Normal file
View File

@ -0,0 +1,39 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class Tovima(BasicNewsRecipe):
title = 'To Vima'
__author__ = 'Stelios'
description = ' News from Greece'
#max_articles_per_feed = 100
oldest_article = 3
publisher = 'To Vima'
category = 'news, GR'
language = 'el'
encoding = 'utf8'
cover_url = 'http://www.tovima.gr/Themes/1/Default/Media/Home//small-n-short-logo.jpg'
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
extra_css = '''
.article_title{font-family :Arial,Helvetica,sans-serif; font-weight: bold; font-size:large;}
.article_text{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class' : ['article_title']}),
dict(name='div', attrs={'class' : ['article_text']})
]
remove_tags = [
dict(name='div', attrs={'class' : ['article_cat']})
]
feeds = [
(u'\u03C0\u03BF\u03BB\u03B9\u03C4\u03B9\u03BA\u03AE', 'http://www.tovima.gr/feed/politics/'),
(u'\u03BF\u03B9\u03BA\u03BF\u03BD\u03BF\u03BC\u03AF\u03B1', 'http://www.tovima.gr/feed/finance/'),
(u'\u03B3\u03BD\u03CE\u03BC\u03B5\u03C2', 'http://www.tovima.gr/feed/opinions/'),
(u'blogs', 'http://www.tovima.gr/feed/blogs/'),
(u'\u03BA\u03CC\u03C3\u03BC\u03BF\u03C2','http://www.tovima.gr/feed/world/'),
(u'science', 'http://www.tovima.gr/feed/science/'),
(u'\u03BA\u03BF\u03B9\u03BD\u03C9\u03BD\u03AF\u03B1', 'http://www.tovima.gr/feed/society/'),
(u'\u03C0\u03BF\u03BB\u03B9\u03C4\u03B9\u03C3\u03BC\u03CC\u03C2', 'http://www.tovima.gr/feed/culture/'),
(u'\u03B1\u03B8\u03BB\u03B7\u03C4\u03B9\u03C3\u03BC\u03CC\u03C2', 'http://www.tovima.gr/feed/sports/')
]

View File

@ -10,27 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
class USAToday(BasicNewsRecipe): class USAToday(BasicNewsRecipe):
title = 'USA Today' title = 'USA Today'
__author__ = 'Kovid Goyal' __author__ = 'calibre'
oldest_article = 1 description = 'newspaper'
publication_type = 'newspaper' encoding = 'utf-8'
timefmt = '' publisher = 'usatoday.com'
max_articles_per_feed = 20 category = 'news, usa'
language = 'en' language = 'en'
no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \ use_embedded_content = False
.byline {font-family: monospace; \ timefmt = ' [%d %b %Y]'
text-align: left; \ max_articles_per_feed = 15
margin-bottom: 1em;}\n \ no_stylesheets = True
.image {text-align: center;}\n \ remove_empty_feeds = True
.caption {text-align: center; \ filterDuplicates = True
font-size: smaller; \
font-style: italic}\n \ extra_css = '''
.credit {text-align: right; \ h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
margin-bottom: 0em; \ #post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
font-size: smaller;}\n \ #post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
.articleBody {text-align: left;}\n ' '''
#simultaneous_downloads = 1
feeds = [ feeds = [
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'), ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'), ('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
@ -43,15 +44,18 @@ class USAToday(BasicNewsRecipe):
('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'), ('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'), ('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'), ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'), ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
] ]
keep_only_tags = [dict(attrs={'class':'story'})] keep_only_tags = [dict(attrs={'class':'story'})]
remove_tags = [ remove_tags = [
dict(attrs={'class':[ dict(attrs={'class':[
'share', 'share',
'reprints', 'reprints',
'inline-h3', 'inline-h3',
'info-extras', 'info-extras rounded',
'inset',
'ppy-outer', 'ppy-outer',
'ppy-caption', 'ppy-caption',
'comments', 'comments',
@ -61,9 +65,13 @@ class USAToday(BasicNewsRecipe):
'tags', 'tags',
'bottom-tools', 'bottom-tools',
'sponsoredlinks', 'sponsoredlinks',
'corrections'
]}), ]}),
dict(name='ul', attrs={'class':'inside-copy'}),
dict(id=['pluck']), dict(id=['pluck']),
] dict(id=['updated']),
dict(id=['post-date-updated'])
]
def get_masthead_url(self): def get_masthead_url(self):

68
recipes/zdnet.fr.recipe Normal file
View File

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
Fetch zdnet.fr
'''
from calibre.web.feeds.news import BasicNewsRecipe
class zdnet(BasicNewsRecipe):
title = 'ZDNet.fr'
__author__ = 'calibre'
description = 'Actualités'
encoding = 'utf-8'
publisher = 'ZDNet.fr'
category = 'Actualité, Informatique, IT'
language = 'fr'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.contentmetadata p {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
#content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['toolbox']}),
dict(name='div', attrs={'class':['clear clearfix']}),
dict(id='emailtoafriend'),
dict(id='storyaudio'),
dict(id='fbtwContainer'),
dict(name='h5')
]
remove_tags_before = dict(id='leftcol')
remove_tags_after = dict(id='content')
feeds = [
('Informatique', 'http://www.zdnet.fr/feeds/rss/actualites/informatique/'),
('Internet', 'http://www.zdnet.fr/feeds/rss/actualites/internet/'),
('Telecom', 'http://www.zdnet.fr/feeds/rss/actualites/telecoms/')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_masthead_url(self):
masthead = 'http://www.zdnet.fr/images/base/logo.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python2
# -*- coding: utf-8 mode: python -*- # -*- coding: utf-8 mode: python -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -123,6 +123,9 @@ class ZeitEPUBAbo(BasicNewsRecipe):
# new login process # new login process
response = browser.open(url) response = browser.open(url)
# Get rid of nested form
response.set_data(response.get_data().replace('<div><form action="/abo/zeit_digital?destination=node%2F94" accept-charset="UTF-8" method="post" id="user-login-form" class="zol_inlinelabel">', ''))
browser.set_response(response)
browser.select_form(nr=2) browser.select_form(nr=2)
browser.form['name']=self.username browser.form['name']=self.username
browser.form['pass']=self.password browser.form['pass']=self.password
@ -178,7 +181,11 @@ class ZeitEPUBAbo(BasicNewsRecipe):
browser = self.get_browser() browser = self.get_browser()
# new login process # new login process
browser.open(url) response=browser.open(url)
# Get rid of nested form
response.set_data(response.get_data().replace('<div><form action="/abo/zeit_digital?destination=node%2F94" accept-charset="UTF-8" method="post" id="user-login-form" class="zol_inlinelabel">', ''))
browser.set_response(response)
browser.select_form(nr=2) browser.select_form(nr=2)
browser.form['name']=self.username browser.form['name']=self.username
browser.form['pass']=self.password browser.form['pass']=self.password
@ -211,4 +218,3 @@ class ZeitEPUBAbo(BasicNewsRecipe):
self.log.warning('Using static old low-res cover') self.log.warning('Using static old low-res cover')
cover_url = 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg' cover_url = 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
return cover_url return cover_url

12
recipes/zougla.recipe Normal file
View File

@ -0,0 +1,12 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1320264153(BasicNewsRecipe):
title = u'zougla'
__author__ = 'Stelios'
language = 'el'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'zougla', u'http://www.zougla.gr/ArticleRss.xml')]

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex import sys, os, textwrap, subprocess, shutil, tempfile, atexit, shlex
from setup import (Command, islinux, isbsd, basenames, modules, functions, from setup import (Command, islinux, isbsd, basenames, modules, functions,
__appname__, __version__) __appname__, __version__)
@ -168,10 +168,6 @@ class Develop(Command):
ret = p.wait() ret = p.wait()
if ret != 0: if ret != 0:
return warn() return warn()
if not isbsd:
os.chown(dest, 0, 0)
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
self.manifest.append(dest) self.manifest.append(dest)
return dest return dest

View File

@ -138,15 +138,10 @@ class LinuxFreeze(Command):
def compile_mount_helper(self): def compile_mount_helper(self):
self.info('Compiling mount helper...') self.info('Compiling mount helper...')
self.regain_privileges()
dest = self.j(self.bin_dir, 'calibre-mount-helper') dest = self.j(self.bin_dir, 'calibre-mount-helper')
subprocess.check_call(['gcc', '-Wall', '-pedantic', subprocess.check_call(['gcc', '-Wall', '-pedantic',
self.j(self.SRC, 'calibre', 'devices', self.j(self.SRC, 'calibre', 'devices',
'linux_mount_helper.c'), '-o', dest]) 'linux_mount_helper.c'), '-o', dest])
os.chown(dest, 0, 0)
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH|stat.S_IRGRP|stat.S_IROTH)
self.drop_privileges()
def copy_python(self): def copy_python(self):
self.info('Copying python...') self.info('Copying python...')

View File

@ -7,15 +7,15 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: calibre\n" "Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n" "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
"POT-Creation-Date: 2011-09-02 16:21+0000\n" "POT-Creation-Date: 2011-09-27 14:31+0000\n"
"PO-Revision-Date: 2011-09-21 13:48+0000\n" "PO-Revision-Date: 2011-10-22 22:04+0000\n"
"Last-Translator: Jellby <Unknown>\n" "Last-Translator: Fitoschido <fitoschido@gmail.com>\n"
"Language-Team: Spanish <es@li.org>\n" "Language-Team: Spanish <es@li.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-09-22 04:47+0000\n" "X-Launchpad-Export-Date: 2011-10-23 05:13+0000\n"
"X-Generator: Launchpad (build 13996)\n" "X-Generator: Launchpad (build 14170)\n"
#. name for aaa #. name for aaa
msgid "Ghotuo" msgid "Ghotuo"
@ -5911,7 +5911,7 @@ msgstr "Gwahatike"
#. name for dai #. name for dai
msgid "Day" msgid "Day"
msgstr "Day" msgstr "Día"
#. name for daj #. name for daj
msgid "Daju; Dar Fur" msgid "Daju; Dar Fur"
@ -18231,7 +18231,7 @@ msgstr ""
#. name for nhi #. name for nhi
msgid "Nahuatl; Zacatlán-Ahuacatlán-Tepetzintla" msgid "Nahuatl; Zacatlán-Ahuacatlán-Tepetzintla"
msgstr "Náhuatl de Zacatlán; Ahuacatlán y Tepetzintla" msgstr "Náhuatl de Zacatlán-Ahuacatlán-Tepetzintla"
#. name for nhk #. name for nhk
msgid "Nahuatl; Isthmus-Cosoleacaque" msgid "Nahuatl; Isthmus-Cosoleacaque"

View File

@ -9,49 +9,49 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-09-27 14:31+0000\n" "POT-Creation-Date: 2011-09-27 14:31+0000\n"
"PO-Revision-Date: 2011-09-27 18:23+0000\n" "PO-Revision-Date: 2011-10-15 17:29+0000\n"
"Last-Translator: Kovid Goyal <Unknown>\n" "Last-Translator: Devilinside <Unknown>\n"
"Language-Team: Hungarian <debian-l10n-hungarian@lists.d.o>\n" "Language-Team: Hungarian <debian-l10n-hungarian@lists.d.o>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-09-28 04:50+0000\n" "X-Launchpad-Export-Date: 2011-10-16 05:14+0000\n"
"X-Generator: Launchpad (build 14049)\n" "X-Generator: Launchpad (build 14124)\n"
"X-Poedit-Country: HUNGARY\n" "X-Poedit-Country: HUNGARY\n"
"Language: hu\n" "Language: hu\n"
"X-Poedit-Language: Hungarian\n" "X-Poedit-Language: Hungarian\n"
#. name for aaa #. name for aaa
msgid "Ghotuo" msgid "Ghotuo"
msgstr "" msgstr "Ghotuo"
#. name for aab #. name for aab
msgid "Alumu-Tesu" msgid "Alumu-Tesu"
msgstr "" msgstr "Alumu-Tesu"
#. name for aac #. name for aac
msgid "Ari" msgid "Ari"
msgstr "" msgstr "Ari"
#. name for aad #. name for aad
msgid "Amal" msgid "Amal"
msgstr "" msgstr "Amal"
#. name for aae #. name for aae
msgid "Albanian; Arbëreshë" msgid "Albanian; Arbëreshë"
msgstr "" msgstr "Albán; Arbëreshë"
#. name for aaf #. name for aaf
msgid "Aranadan" msgid "Aranadan"
msgstr "" msgstr "Aranadan"
#. name for aag #. name for aag
msgid "Ambrak" msgid "Ambrak"
msgstr "" msgstr "Ambrak"
#. name for aah #. name for aah
msgid "Arapesh; Abu'" msgid "Arapesh; Abu'"
msgstr "" msgstr "Arapesh; Abu'"
#. name for aai #. name for aai
msgid "Arifama-Miniafia" msgid "Arifama-Miniafia"
@ -75,7 +75,7 @@ msgstr ""
#. name for aao #. name for aao
msgid "Arabic; Algerian Saharan" msgid "Arabic; Algerian Saharan"
msgstr "" msgstr "Arab; Algériai Szaharai"
#. name for aap #. name for aap
msgid "Arára; Pará" msgid "Arára; Pará"
@ -87,7 +87,7 @@ msgstr ""
#. name for aar #. name for aar
msgid "Afar" msgid "Afar"
msgstr "afar" msgstr "Afar"
#. name for aas #. name for aas
msgid "Aasáx" msgid "Aasáx"
@ -498,10 +498,9 @@ msgstr ""
msgid "Tapei" msgid "Tapei"
msgstr "" msgstr ""
# src/trans.h:281 src/trans.h:318
#. name for afr #. name for afr
msgid "Afrikaans" msgid "Afrikaans"
msgstr "afrikaans" msgstr "Afrikaans"
#. name for afs #. name for afs
msgid "Creole; Afro-Seminole" msgid "Creole; Afro-Seminole"
@ -801,7 +800,7 @@ msgstr ""
#. name for aka #. name for aka
msgid "Akan" msgid "Akan"
msgstr "akan" msgstr "Akan"
#. name for akb #. name for akb
msgid "Batak Angkola" msgid "Batak Angkola"
@ -1015,10 +1014,9 @@ msgstr ""
msgid "Amarag" msgid "Amarag"
msgstr "" msgstr ""
# src/trans.h:283
#. name for amh #. name for amh
msgid "Amharic" msgid "Amharic"
msgstr "amhara" msgstr "Amhara"
#. name for ami #. name for ami
msgid "Amis" msgid "Amis"
@ -1425,10 +1423,9 @@ msgstr ""
msgid "Arrarnta; Western" msgid "Arrarnta; Western"
msgstr "" msgstr ""
# src/trans.h:294
#. name for arg #. name for arg
msgid "Aragonese" msgid "Aragonese"
msgstr "aragóniai" msgstr "Aragóniai"
#. name for arh #. name for arh
msgid "Arhuaco" msgid "Arhuaco"
@ -1548,7 +1545,7 @@ msgstr ""
#. name for asm #. name for asm
msgid "Assamese" msgid "Assamese"
msgstr "asszámi" msgstr "Asszámi"
#. name for asn #. name for asn
msgid "Asuriní; Xingú" msgid "Asuriní; Xingú"
@ -1790,10 +1787,9 @@ msgstr ""
msgid "Arabic; Uzbeki" msgid "Arabic; Uzbeki"
msgstr "" msgstr ""
# src/trans.h:283
#. name for ava #. name for ava
msgid "Avaric" msgid "Avaric"
msgstr "avar" msgstr "Avar"
#. name for avb #. name for avb
msgid "Avau" msgid "Avau"
@ -1805,7 +1801,7 @@ msgstr ""
#. name for ave #. name for ave
msgid "Avestan" msgid "Avestan"
msgstr "aveszti" msgstr "Avesztai"
#. name for avi #. name for avi
msgid "Avikam" msgid "Avikam"
@ -1945,7 +1941,7 @@ msgstr ""
#. name for ayc #. name for ayc
msgid "Aymara; Southern" msgid "Aymara; Southern"
msgstr "" msgstr "Ajmara; Déli"
#. name for ayd #. name for ayd
msgid "Ayabadhu" msgid "Ayabadhu"
@ -1977,7 +1973,7 @@ msgstr ""
#. name for aym #. name for aym
msgid "Aymara" msgid "Aymara"
msgstr "aymara" msgstr "Ajmara"
#. name for ayn #. name for ayn
msgid "Arabic; Sanaani" msgid "Arabic; Sanaani"
@ -1997,7 +1993,7 @@ msgstr ""
#. name for ayr #. name for ayr
msgid "Aymara; Central" msgid "Aymara; Central"
msgstr "" msgstr "Ajmara; Közép"
#. name for ays #. name for ays
msgid "Ayta; Sorsogon" msgid "Ayta; Sorsogon"
@ -2025,12 +2021,11 @@ msgstr ""
#. name for azb #. name for azb
msgid "Azerbaijani; South" msgid "Azerbaijani; South"
msgstr "" msgstr "Azeri; Déli"
# src/trans.h:311
#. name for aze #. name for aze
msgid "Azerbaijani" msgid "Azerbaijani"
msgstr "azeri" msgstr "Azeri"
#. name for azg #. name for azg
msgid "Amuzgo; San Pedro Amuzgos" msgid "Amuzgo; San Pedro Amuzgos"
@ -2038,7 +2033,7 @@ msgstr ""
#. name for azj #. name for azj
msgid "Azerbaijani; North" msgid "Azerbaijani; North"
msgstr "" msgstr "Azeri; Északi"
#. name for azm #. name for azm
msgid "Amuzgo; Ipalapa" msgid "Amuzgo; Ipalapa"
@ -2090,7 +2085,7 @@ msgstr ""
#. name for bak #. name for bak
msgid "Bashkir" msgid "Bashkir"
msgstr "baskír" msgstr "Baskír"
#. name for bal #. name for bal
msgid "Baluchi" msgid "Baluchi"
@ -2115,7 +2110,7 @@ msgstr ""
#. name for bar #. name for bar
msgid "Bavarian" msgid "Bavarian"
msgstr "" msgstr "Bajor"
#. name for bas #. name for bas
msgid "Basa (Cameroon)" msgid "Basa (Cameroon)"
@ -2497,10 +2492,9 @@ msgstr "beja"
msgid "Bebeli" msgid "Bebeli"
msgstr "" msgstr ""
# src/trans.h:286
#. name for bel #. name for bel
msgid "Belarusian" msgid "Belarusian"
msgstr "belorusz" msgstr "Belarusz"
#. name for bem #. name for bem
msgid "Bemba (Zambia)" msgid "Bemba (Zambia)"
@ -2508,7 +2502,7 @@ msgstr ""
#. name for ben #. name for ben
msgid "Bengali" msgid "Bengali"
msgstr "bengáli" msgstr "Bengáli"
#. name for beo #. name for beo
msgid "Beami" msgid "Beami"
@ -3510,10 +3504,9 @@ msgstr ""
msgid "Borôro" msgid "Borôro"
msgstr "" msgstr ""
# src/trans.h:309
#. name for bos #. name for bos
msgid "Bosnian" msgid "Bosnian"
msgstr "bosnyák" msgstr "Bosnyák"
#. name for bot #. name for bot
msgid "Bongo" msgid "Bongo"
@ -3685,7 +3678,7 @@ msgstr ""
#. name for bqn #. name for bqn
msgid "Bulgarian Sign Language" msgid "Bulgarian Sign Language"
msgstr "" msgstr "Bolgár jelnyelv"
#. name for bqo #. name for bqo
msgid "Balo" msgid "Balo"
@ -4078,10 +4071,9 @@ msgstr ""
msgid "Bugawac" msgid "Bugawac"
msgstr "" msgstr ""
# src/trans.h:285
#. name for bul #. name for bul
msgid "Bulgarian" msgid "Bulgarian"
msgstr "bolgár" msgstr "Bolgár"
#. name for bum #. name for bum
msgid "Bulu (Cameroon)" msgid "Bulu (Cameroon)"
@ -7445,10 +7437,9 @@ msgstr ""
msgid "Semimi" msgid "Semimi"
msgstr "" msgstr ""
# src/trans.h:284
#. name for eus #. name for eus
msgid "Basque" msgid "Basque"
msgstr "baszk" msgstr "Baszk"
#. name for eve #. name for eve
msgid "Even" msgid "Even"
@ -7534,10 +7525,9 @@ msgstr ""
msgid "Fang (Equatorial Guinea)" msgid "Fang (Equatorial Guinea)"
msgstr "" msgstr ""
# src/trans.h:294
#. name for fao #. name for fao
msgid "Faroese" msgid "Faroese"
msgstr "feröi" msgstr "Feröeri"
#. name for fap #. name for fap
msgid "Palor" msgid "Palor"
@ -29414,7 +29404,7 @@ msgstr ""
#. name for xzp #. name for xzp
msgid "Zapotec; Ancient" msgid "Zapotec; Ancient"
msgstr "" msgstr "Zapoték; Ősi"
#. name for yaa #. name for yaa
msgid "Yaminahua" msgid "Yaminahua"
@ -30326,27 +30316,27 @@ msgstr ""
#. name for zaa #. name for zaa
msgid "Zapotec; Sierra de Juárez" msgid "Zapotec; Sierra de Juárez"
msgstr "" msgstr "Zapoték; Sierra de Juárezi"
#. name for zab #. name for zab
msgid "Zapotec; San Juan Guelavía" msgid "Zapotec; San Juan Guelavía"
msgstr "" msgstr "Zapoték; San Juan Guelavíai"
#. name for zac #. name for zac
msgid "Zapotec; Ocotlán" msgid "Zapotec; Ocotlán"
msgstr "" msgstr "Zapoték; Ocotláni"
#. name for zad #. name for zad
msgid "Zapotec; Cajonos" msgid "Zapotec; Cajonos"
msgstr "zapoték; Cajonos" msgstr "Zapoték; Cajonesi"
#. name for zae #. name for zae
msgid "Zapotec; Yareni" msgid "Zapotec; Yareni"
msgstr "zapoték; Yareni" msgstr "Zapoték; Yareni"
#. name for zaf #. name for zaf
msgid "Zapotec; Ayoquesco" msgid "Zapotec; Ayoquesco"
msgstr "" msgstr "Zapoték; Ayoquescoi"
#. name for zag #. name for zag
msgid "Zaghawa" msgid "Zaghawa"
@ -30358,7 +30348,7 @@ msgstr "zangval"
#. name for zai #. name for zai
msgid "Zapotec; Isthmus" msgid "Zapotec; Isthmus"
msgstr "zapoték; Isthmus" msgstr "Zapoték; Isthmusi"
#. name for zaj #. name for zaj
msgid "Zaramo" msgid "Zaramo"
@ -30374,31 +30364,31 @@ msgstr "zozu"
#. name for zam #. name for zam
msgid "Zapotec; Miahuatlán" msgid "Zapotec; Miahuatlán"
msgstr "" msgstr "Zapoték; Miahuatláni"
#. name for zao #. name for zao
msgid "Zapotec; Ozolotepec" msgid "Zapotec; Ozolotepec"
msgstr "" msgstr "Zapoték; Ozolotepeci"
#. name for zap #. name for zap
msgid "Zapotec" msgid "Zapotec"
msgstr "zapoték" msgstr "Zapoték"
#. name for zaq #. name for zaq
msgid "Zapotec; Aloápam" msgid "Zapotec; Aloápam"
msgstr "" msgstr "Zapoték; Aloápami"
#. name for zar #. name for zar
msgid "Zapotec; Rincón" msgid "Zapotec; Rincón"
msgstr "zapoték; Rincón" msgstr "Zapoték; Rincóni"
#. name for zas #. name for zas
msgid "Zapotec; Santo Domingo Albarradas" msgid "Zapotec; Santo Domingo Albarradas"
msgstr "" msgstr "Zapoték; Santo Domingo Albarradasi"
#. name for zat #. name for zat
msgid "Zapotec; Tabaa" msgid "Zapotec; Tabaa"
msgstr "zapoték; Tabaa" msgstr "Zapoték; Tabaa-i"
# src/trans.h:193 # src/trans.h:193
#. name for zau #. name for zau
@ -30407,15 +30397,15 @@ msgstr "zangskari"
#. name for zav #. name for zav
msgid "Zapotec; Yatzachi" msgid "Zapotec; Yatzachi"
msgstr "" msgstr "Zapoték; Yatzachi-i"
#. name for zaw #. name for zaw
msgid "Zapotec; Mitla" msgid "Zapotec; Mitla"
msgstr "zapoték; Mitla" msgstr "Zapoték; Mitlai"
#. name for zax #. name for zax
msgid "Zapotec; Xadani" msgid "Zapotec; Xadani"
msgstr "zapoték; Xadani" msgstr "Zapoték; Xadani-i"
#. name for zay #. name for zay
msgid "Zayse-Zergulla" msgid "Zayse-Zergulla"
@ -30991,7 +30981,7 @@ msgstr "tokano"
#. name for zul #. name for zul
msgid "Zulu" msgid "Zulu"
msgstr "zulu" msgstr "Zulu"
# src/trans.h:316 # src/trans.h:316
#. name for zum #. name for zum

View File

@ -10,14 +10,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-09-27 14:31+0000\n" "POT-Creation-Date: 2011-09-27 14:31+0000\n"
"PO-Revision-Date: 2011-09-27 18:36+0000\n" "PO-Revision-Date: 2011-10-25 19:06+0000\n"
"Last-Translator: Kovid Goyal <Unknown>\n" "Last-Translator: zeugma <Unknown>\n"
"Language-Team: Turkish <gnome-turk@gnome.org>\n" "Language-Team: Turkish <gnome-turk@gnome.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-09-28 05:12+0000\n" "X-Launchpad-Export-Date: 2011-10-26 05:13+0000\n"
"X-Generator: Launchpad (build 14049)\n" "X-Generator: Launchpad (build 14189)\n"
"Language: tr\n" "Language: tr\n"
#. name for aaa #. name for aaa
@ -54,7 +54,7 @@ msgstr ""
#. name for aai #. name for aai
msgid "Arifama-Miniafia" msgid "Arifama-Miniafia"
msgstr "" msgstr "Arifama-Miniafia"
#. name for aak #. name for aak
msgid "Ankave" msgid "Ankave"
@ -122,7 +122,7 @@ msgstr "Bankon"
#. name for abc #. name for abc
msgid "Ayta; Ambala" msgid "Ayta; Ambala"
msgstr "" msgstr "Ayta; Ambala"
#. name for abd #. name for abd
msgid "Manide" msgid "Manide"
@ -130,11 +130,11 @@ msgstr "Manide"
#. name for abe #. name for abe
msgid "Abnaki; Western" msgid "Abnaki; Western"
msgstr "" msgstr "Abnaki; Western"
#. name for abf #. name for abf
msgid "Abai Sungai" msgid "Abai Sungai"
msgstr "" msgstr "Abai Sungai"
#. name for abg #. name for abg
msgid "Abaga" msgid "Abaga"
@ -146,7 +146,7 @@ msgstr "Arapça; Tacikçe"
#. name for abi #. name for abi
msgid "Abidji" msgid "Abidji"
msgstr "" msgstr "Abidji"
#. name for abj #. name for abj
msgid "Aka-Bea" msgid "Aka-Bea"
@ -158,7 +158,7 @@ msgstr "Abhazca"
#. name for abl #. name for abl
msgid "Lampung Nyo" msgid "Lampung Nyo"
msgstr "" msgstr "Lampung Nyo"
#. name for abm #. name for abm
msgid "Abanyom" msgid "Abanyom"
@ -282,7 +282,7 @@ msgstr "Achterhoeks"
#. name for acu #. name for acu
msgid "Achuar-Shiwiar" msgid "Achuar-Shiwiar"
msgstr "" msgstr "Achuar-Shiwiar"
#. name for acv #. name for acv
msgid "Achumawi" msgid "Achumawi"

View File

@ -206,7 +206,7 @@ class Resources(Command):
function_dict = {} function_dict = {}
import inspect import inspect
from calibre.utils.formatter_functions import formatter_functions from calibre.utils.formatter_functions import formatter_functions
for obj in formatter_functions.get_builtins().values(): for obj in formatter_functions().get_builtins().values():
eval_func = inspect.getmembers(obj, eval_func = inspect.getmembers(obj,
lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate') lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
try: try:

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 8, 22) numeric_version = (0, 8, 24)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -1251,6 +1251,17 @@ class StoreEbookNLStore(StoreBase):
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'PDF']
affiliate = False affiliate = False
class StoreEbookpointStore(StoreBase):
name = 'Ebookpoint'
author = u'Tomasz Długosz'
description = u'Ebooki wolne od DRM, 3 formaty w pakiecie, wysyłanie na Kindle'
actual_plugin = 'calibre.gui2.store.stores.ebookpoint_plugin:EbookpointStore'
drm_free_only = True
headquarters = 'PL'
formats = ['EPUB', 'MOBI', 'PDF']
affiliate = True
class StoreEbookscomStore(StoreBase): class StoreEbookscomStore(StoreBase):
name = 'eBooks.com' name = 'eBooks.com'
description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.' description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
@ -1465,7 +1476,7 @@ class StoreVirtualoStore(StoreBase):
actual_plugin = 'calibre.gui2.store.stores.virtualo_plugin:VirtualoStore' actual_plugin = 'calibre.gui2.store.stores.virtualo_plugin:VirtualoStore'
headquarters = 'PL' headquarters = 'PL'
formats = ['EPUB', 'PDF'] formats = ['EPUB', 'MOBI', 'PDF']
class StoreWaterstonesUKStore(StoreBase): class StoreWaterstonesUKStore(StoreBase):
name = 'Waterstones UK' name = 'Waterstones UK'
@ -1543,6 +1554,7 @@ plugins += [
StoreChitankaStore, StoreChitankaStore,
StoreDieselEbooksStore, StoreDieselEbooksStore,
StoreEbookNLStore, StoreEbookNLStore,
StoreEbookpointStore,
StoreEbookscomStore, StoreEbookscomStore,
StoreEBookShoppeUKStore, StoreEBookShoppeUKStore,
StoreEHarlequinStore, StoreEHarlequinStore,

View File

@ -341,7 +341,9 @@ class DB(object):
if f['is_custom']] if f['is_custom']]
for f in fmvals: for f in fmvals:
self.create_custom_column(f['label'], f['name'], self.create_custom_column(f['label'], f['name'],
f['datatype'], f['is_multiple'] is not None, f['datatype'],
(f['is_multiple'] is not None and
len(f['is_multiple']) > 0),
f['is_editable'], f['display']) f['is_editable'], f['display'])
defs = self.prefs.defaults defs = self.prefs.defaults

View File

@ -36,7 +36,8 @@ class ANDROID(USBMS):
0xca2 : [0x100, 0x0227, 0x0226, 0x222], 0xca2 : [0x100, 0x0227, 0x0226, 0x222],
0xca3 : [0x100, 0x0227, 0x0226, 0x222], 0xca3 : [0x100, 0x0227, 0x0226, 0x222],
0xca4 : [0x100, 0x0227, 0x0226, 0x222], 0xca4 : [0x100, 0x0227, 0x0226, 0x222],
0xca9 : [0x100, 0x0227, 0x0226, 0x222] 0xca9 : [0x100, 0x0227, 0x0226, 0x222],
0xcac : [0x100, 0x0227, 0x0226, 0x222],
}, },
# Eken # Eken
@ -77,7 +78,7 @@ class ANDROID(USBMS):
# Samsung # Samsung
0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400], 0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
0x681c : [0x0222, 0x0224, 0x0400], 0x681c : [0x0222, 0x0223, 0x0224, 0x0400],
0x6640 : [0x0100], 0x6640 : [0x0100],
0x685b : [0x0400], 0x685b : [0x0400],
0x685e : [0x0400], 0x685e : [0x0400],
@ -138,8 +139,12 @@ class ANDROID(USBMS):
# Advent # Advent
0x0955 : { 0x7100 : [0x9999] }, # This is the same as the Notion Ink Adam 0x0955 : { 0x7100 : [0x9999] }, # This is the same as the Notion Ink Adam
# Kobo
0x2237: { 0x2208 : [0x0226] },
} }
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books'] EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books',
'sdcard/ebooks']
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to ' EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
'send e-books to on the device. The first one that exists will ' 'send e-books to on the device. The first one that exists will '
'be used') 'be used')
@ -149,7 +154,7 @@ class ANDROID(USBMS):
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA', 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL'] 'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -160,7 +165,8 @@ class ANDROID(USBMS):
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A', 'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI'] 'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
'UMS', '.K080']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',

View File

@ -62,7 +62,7 @@ class DevicePlugin(Plugin):
#: Icon for this device #: Icon for this device
icon = I('reader.png') icon = I('reader.png')
# Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations() # Encapsulates an annotation fetched from the device
UserAnnotation = namedtuple('Annotation','type, value') UserAnnotation = namedtuple('Annotation','type, value')
#: GUI displays this as a message if not None. Useful if opening can take a #: GUI displays this as a message if not None. Useful if opening can take a

View File

@ -13,6 +13,8 @@ import datetime, os, re, sys, json, hashlib
from calibre.devices.kindle.apnx import APNXBuilder from calibre.devices.kindle.apnx import APNXBuilder
from calibre.devices.kindle.bookmark import Bookmark from calibre.devices.kindle.bookmark import Bookmark
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
from calibre.ebooks.metadata import MetaInformation
from calibre import strftime
''' '''
Notes on collections: Notes on collections:
@ -164,6 +166,121 @@ class KINDLE(USBMS):
# This returns as job.result in gui2.ui.annotations_fetched(self,job) # This returns as job.result in gui2.ui.annotations_fetched(self,job)
return bookmarked_books return bookmarked_books
def generate_annotation_html(self, bookmark):
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
# Returns <div class="user_annotations"> ... </div>
last_read_location = bookmark.last_read_location
timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
percent_read = bookmark.percent_read
ka_soup = BeautifulSoup()
dtc = 0
divTag = Tag(ka_soup,'div')
divTag['class'] = 'user_annotations'
# Add the last-read location
spanTag = Tag(ka_soup, 'span')
spanTag['style'] = 'font-weight:bold'
if bookmark.book_format == 'pdf':
spanTag.insert(0,NavigableString(
_("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
dict(time=strftime(u'%x', timestamp.timetuple()),
loc=last_read_location,
pr=percent_read)))
else:
spanTag.insert(0,NavigableString(
_("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
dict(time=strftime(u'%x', timestamp.timetuple()),
loc=last_read_location,
pr=percent_read)))
divTag.insert(dtc, spanTag)
dtc += 1
divTag.insert(dtc, Tag(ka_soup,'br'))
dtc += 1
if bookmark.user_notes:
user_notes = bookmark.user_notes
annotations = []
# Add the annotations sorted by location
# Italicize highlighted text
for location in sorted(user_notes):
if user_notes[location]['text']:
annotations.append(
_('<b>Location %(dl)d &bull; %(typ)s</b><br />%(text)s<br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type'],
text=(user_notes[location]['text'] if \
user_notes[location]['type'] == 'Note' else \
'<i>%s</i>' % user_notes[location]['text'])))
else:
if bookmark.book_format == 'pdf':
annotations.append(
_('<b>Page %(dl)d &bull; %(typ)s</b><br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type']))
else:
annotations.append(
_('<b>Location %(dl)d &bull; %(typ)s</b><br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type']))
for annotation in annotations:
divTag.insert(dtc, annotation)
dtc += 1
ka_soup.insert(0,divTag)
return ka_soup
def add_annotation_to_library(self, db, db_id, annotation):
from calibre.ebooks.BeautifulSoup import Tag
bm = annotation
ignore_tags = set(['Catalog', 'Clippings'])
if bm.type == 'kindle_bookmark':
mi = db.get_metadata(db_id, index_is_id=True)
user_notes_soup = self.generate_annotation_html(bm.value)
if mi.comments:
a_offset = mi.comments.find('<div class="user_annotations">')
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
if a_offset >= 0:
mi.comments = mi.comments[:a_offset]
if ad_offset >= 0:
mi.comments = mi.comments[:ad_offset]
if set(mi.tags).intersection(ignore_tags):
return
if mi.comments:
hrTag = Tag(user_notes_soup,'hr')
hrTag['class'] = 'annotations_divider'
user_notes_soup.insert(0, hrTag)
mi.comments += unicode(user_notes_soup.prettify())
else:
mi.comments = unicode(user_notes_soup.prettify())
# Update library comments
db.set_comment(db_id, mi.comments)
# Add bookmark file to db_id
db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
bm.value.path, index_is_id=True)
elif bm.type == 'kindle_clippings':
# Find 'My Clippings' author=Kindle in database, or add
last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
mc_id = list(db.data.search_getting_ids('title:"My Clippings"', ''))
if mc_id:
db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'],
index_is_id=True)
mi = db.get_metadata(mc_id[0], index_is_id=True)
mi.comments = last_update
db.set_metadata(mc_id[0], mi)
else:
mi = MetaInformation('My Clippings', authors = ['Kindle'])
mi.tags = ['Clippings']
mi.comments = last_update
db.add_books([bm.value['path']], ['txt'], [mi])
class KINDLE2(KINDLE): class KINDLE2(KINDLE):

View File

@ -0,0 +1,112 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2011, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from contextlib import closing
import sqlite3 as sqlite
class Bookmark(): # {{{
'''
A simple class fetching bookmark data
kobo-specific
'''
def __init__(self, db_path, contentid, path, id, book_format, bookmark_extension):
self.book_format = book_format
self.bookmark_extension = bookmark_extension
self.book_length = 0 # Not Used
self.id = id
self.last_read = 0
self.last_read_location = 0 # Not Used
self.path = path
self.timestamp = 0
self.user_notes = None
self.db_path = db_path
self.contentid = contentid
self.percent_read = 0
self.get_bookmark_data()
self.get_book_length() # Not Used
def get_bookmark_data(self):
''' Return the timestamp and last_read_location '''
user_notes = {}
self.timestamp = os.path.getmtime(self.path)
with closing(sqlite.connect(self.db_path)) as connection:
# return bytestrings if the content cannot the decoded as unicode
connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
cursor = connection.cursor()
t = (self.contentid,)
cursor.execute('select bm.bookmarkid, bm.contentid, bm.volumeid, '
'bm.text, bm.annotation, bm.ChapterProgress, '
'bm.StartContainerChildIndex, bm.StartOffset, c.BookTitle, '
'c.TITLE, c.volumeIndex, c.___NumPages '
'from Bookmark bm inner join Content c on '
'bm.contentid = c.contentid and '
'bm.volumeid = ? order by bm.volumeid, bm.chapterprogress', t)
previous_chapter = 0
bm_count = 0
for row in cursor:
current_chapter = row[10]
if previous_chapter == current_chapter:
bm_count = bm_count + 1
else:
bm_count = 0
text = row[3]
annotation = row[4]
# A dog ear (bent upper right corner) is a bookmark
if row[6] == row[7] == 0: # StartContainerChildIndex = StartOffset = 0
e_type = 'Bookmark'
text = row[9]
# highlight is text with no annotation
elif text is not None and (annotation is None or annotation == ""):
e_type = 'Highlight'
elif text and annotation:
e_type = 'Annotation'
else:
e_type = 'Unknown annotation type'
note_id = row[10] + bm_count
chapter_title = row[9]
# book_title = row[8]
chapter_progress = min(round(float(100*row[5]),2),100)
user_notes[note_id] = dict(id=self.id,
displayed_location=note_id,
type=e_type,
text=text,
annotation=annotation,
chapter=row[10],
chapter_title=chapter_title,
chapter_progress=chapter_progress)
previous_chapter = row[10]
# debug_print("e_type:" , e_type, '\t', 'loc: ', note_id, 'text: ', text,
# 'annotation: ', annotation, 'chapter_title: ', chapter_title,
# 'chapter_progress: ', chapter_progress, 'date: ')
cursor.execute('select datelastread, ___PercentRead from content '
'where bookid is Null and '
'contentid = ?', t)
for row in cursor:
self.last_read = row[0]
self.percent_read = row[1]
# print row[1]
cursor.close()
# self.last_read_location = self.last_read - self.pdf_page_offset
self.user_notes = user_notes
def get_book_length(self):
#TL self.book_length = 0
#TL self.book_length = int(unpack('>I', record0[0x04:0x08])[0])
pass
# }}}

View File

@ -2,20 +2,22 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Timothy Legge <timlegge at gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, time, calendar
import sqlite3 as sqlite import sqlite3 as sqlite
from contextlib import closing from contextlib import closing
from calibre.devices.usbms.books import BookList from calibre.devices.usbms.books import BookList
from calibre.devices.kobo.books import Book from calibre.devices.kobo.books import Book
from calibre.devices.kobo.books import ImageWrapper from calibre.devices.kobo.books import ImageWrapper
from calibre.devices.kobo.bookmark import Bookmark
from calibre.devices.mime import mime_type_ext from calibre.devices.mime import mime_type_ext
from calibre.devices.usbms.driver import USBMS, debug_print from calibre.devices.usbms.driver import USBMS, debug_print
from calibre import prints from calibre import prints
from calibre.devices.usbms.books import CollectionsBookList from calibre.devices.usbms.books import CollectionsBookList
from calibre.utils.magick.draw import save_cover_data_to from calibre.utils.magick.draw import save_cover_data_to
from calibre.ptempfile import PersistentTemporaryFile
class KOBO(USBMS): class KOBO(USBMS):
@ -23,7 +25,7 @@ class KOBO(USBMS):
gui_name = 'Kobo Reader' gui_name = 'Kobo Reader'
description = _('Communicate with the Kobo Reader') description = _('Communicate with the Kobo Reader')
author = 'Timothy Legge' author = 'Timothy Legge'
version = (1, 0, 10) version = (1, 0, 11)
dbversion = 0 dbversion = 0
fwversion = 0 fwversion = 0
@ -46,6 +48,7 @@ class KOBO(USBMS):
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = True
SUPPORTS_ANNOTATIONS = True
VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo']) VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo'])
@ -58,18 +61,25 @@ class KOBO(USBMS):
' ebook file itself. With this option, calibre will send a ' ' ebook file itself. With this option, calibre will send a '
'separate cover image to the reader, useful if you ' 'separate cover image to the reader, useful if you '
'have modified the cover.'), 'have modified the cover.'),
_('Upload Black and White Covers') _('Upload Black and White Covers'),
_('Show expired books') +
':::'+_('A bug in an earlier version left non kepubs book records'
' in the datbase. With this option Calibre will show the '
'expired records and allow you to delete them with '
'the new delete logic.'),
] ]
EXTRA_CUSTOMIZATION_DEFAULT = [ EXTRA_CUSTOMIZATION_DEFAULT = [
', '.join(['tags']), ', '.join(['tags']),
True, True,
True,
True True
] ]
OPT_COLLECTIONS = 0 OPT_COLLECTIONS = 0
OPT_UPLOAD_COVERS = 1 OPT_UPLOAD_COVERS = 1
OPT_UPLOAD_GRAYSCALE_COVERS = 2 OPT_UPLOAD_GRAYSCALE_COVERS = 2
OPT_SHOW_EXPIRED_BOOK_RECORDS = 3
def initialize(self): def initialize(self):
USBMS.initialize(self) USBMS.initialize(self)
@ -105,6 +115,7 @@ class KOBO(USBMS):
if self.fwversion != '1.0' and self.fwversion != '1.4': if self.fwversion != '1.0' and self.fwversion != '1.4':
self.has_kepubs = True self.has_kepubs = True
debug_print('Version of driver: ', self.version, 'Has kepubs:', self.has_kepubs)
debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs) debug_print('Version of firmware: ', self.fwversion, 'Has kepubs:', self.has_kepubs)
self.booklist_class.rebuild_collections = self.rebuild_collections self.booklist_class.rebuild_collections = self.rebuild_collections
@ -228,18 +239,23 @@ class KOBO(USBMS):
self.dbversion = result[0] self.dbversion = result[0]
debug_print("Database Version: ", self.dbversion) debug_print("Database Version: ", self.dbversion)
opts = self.settings()
if self.dbversion >= 16: if self.dbversion >= 16:
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \ 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \
'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' 'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
elif self.dbversion < 16 and self.dbversion >= 14: elif self.dbversion < 16 and self.dbversion >= 14:
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility from content where ' \ 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility from content where ' \
'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' 'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
elif self.dbversion < 14 and self.dbversion >= 8: elif self.dbversion < 14 and self.dbversion >= 8:
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where ' \ 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where ' \
'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' 'BookID is Null and not ((___ExpirationStatus=3 or ___ExpirationStatus is Null) %(expiry)s') % dict(expiry=' and ContentType = 6)' \
if opts.extra_customization[self.OPT_SHOW_EXPIRED_BOOK_RECORDS] else ')')
else: else:
query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null' 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null'
@ -339,21 +355,23 @@ class KOBO(USBMS):
# Kobo does not delete the Book row (ie the row where the BookID is Null) # Kobo does not delete the Book row (ie the row where the BookID is Null)
# The next server sync should remove the row # The next server sync should remove the row
cursor.execute('delete from content where BookID = ?', t) cursor.execute('delete from content where BookID = ?', t)
try: if ContentType == 6:
cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0, ___ExpirationStatus=3 ' \
'where BookID is Null and ContentID =?',t)
except Exception as e:
if 'no such column' not in str(e):
raise
try: try:
cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0 ' \ cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0, ___ExpirationStatus=3 ' \
'where BookID is Null and ContentID =?',t) 'where BookID is Null and ContentID =?',t)
except Exception as e: except Exception as e:
if 'no such column' not in str(e): if 'no such column' not in str(e):
raise raise
cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\' ' \ try:
'where BookID is Null and ContentID =?',t) cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0 ' \
'where BookID is Null and ContentID =?',t)
except Exception as e:
if 'no such column' not in str(e):
raise
cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\' ' \
'where BookID is Null and ContentID =?',t)
else:
cursor.execute('delete from content where BookID is Null and ContentID =?',t)
connection.commit() connection.commit()
@ -370,7 +388,7 @@ class KOBO(USBMS):
path_prefix = '.kobo/images/' path_prefix = '.kobo/images/'
path = self._main_prefix + path_prefix + ImageID path = self._main_prefix + path_prefix + ImageID
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed',) file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed', ' - N3_FULL.parsed',)
for ending in file_endings: for ending in file_endings:
fpath = path + ending fpath = path + ending
@ -750,9 +768,12 @@ class KOBO(USBMS):
blists = {} blists = {}
for i in paths: for i in paths:
if booklists[i] is not None: try:
#debug_print('Booklist: ', i) if booklists[i] is not None:
blists[i] = booklists[i] #debug_print('Booklist: ', i)
blists[i] = booklists[i]
except IndexError:
pass
opts = self.settings() opts = self.settings()
if opts.extra_customization: if opts.extra_customization:
collections = [x.lower().strip() for x in collections = [x.lower().strip() for x in
@ -843,6 +864,7 @@ class KOBO(USBMS):
' - N3_LIBRARY_FULL.parsed':(355,530), ' - N3_LIBRARY_FULL.parsed':(355,530),
' - N3_LIBRARY_GRID.parsed':(149,233), ' - N3_LIBRARY_GRID.parsed':(149,233),
' - N3_LIBRARY_LIST.parsed':(60,90), ' - N3_LIBRARY_LIST.parsed':(60,90),
' - N3_FULL.parsed':(600,800),
' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)} ' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)}
for ending, resize in file_endings.items(): for ending, resize in file_endings.items():
@ -865,3 +887,216 @@ class KOBO(USBMS):
else: else:
debug_print("ImageID could not be retreived from the database") debug_print("ImageID could not be retreived from the database")
def prepare_addable_books(self, paths):
'''
The Kobo supports an encrypted epub refered to as a kepub
Unfortunately Kobo decided to put the files on the device
with no file extension. I just hope that decision causes
them as much grief as it does me :-)
This has to make a temporary copy of the book files with a
epub extension to allow Calibre's normal processing to
deal with the file appropriately
'''
for idx, path in enumerate(paths):
if path.find('kepub') >= 0:
with closing(open(path)) as r:
tf = PersistentTemporaryFile(suffix='.epub')
tf.write(r.read())
paths[idx] = tf.name
return paths
def create_annotations_path(self, mdata, device_path=None):
if device_path:
return device_path
return USBMS.create_annotations_path(self, mdata)
def get_annotations(self, path_map):
EPUB_FORMATS = [u'epub']
epub_formats = set(EPUB_FORMATS)
def get_storage():
storage = []
if self._main_prefix:
storage.append(os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN))
if self._card_a_prefix:
storage.append(os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A))
if self._card_b_prefix:
storage.append(os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B))
return storage
def resolve_bookmark_paths(storage, path_map):
pop_list = []
book_ext = {}
for id in path_map:
file_fmts = set()
for fmt in path_map[id]['fmts']:
file_fmts.add(fmt)
bookmark_extension = None
if file_fmts.intersection(epub_formats):
book_extension = list(file_fmts.intersection(epub_formats))[0]
bookmark_extension = 'epub'
if bookmark_extension:
for vol in storage:
bkmk_path = path_map[id]['path']
bkmk_path = bkmk_path
if os.path.exists(bkmk_path):
path_map[id] = bkmk_path
book_ext[id] = book_extension
break
else:
pop_list.append(id)
else:
pop_list.append(id)
# Remove non-existent bookmark templates
for id in pop_list:
path_map.pop(id)
return path_map, book_ext
storage = get_storage()
path_map, book_ext = resolve_bookmark_paths(storage, path_map)
bookmarked_books = {}
for id in path_map:
extension = os.path.splitext(path_map[id])[1]
ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(path_map[id])
ContentID = self.contentid_from_path(path_map[id], ContentType)
bookmark_ext = extension
db_path = self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')
myBookmark = Bookmark(db_path, ContentID, path_map[id], id, book_ext[id], bookmark_ext)
bookmarked_books[id] = self.UserAnnotation(type='kobo_bookmark', value=myBookmark)
# This returns as job.result in gui2.ui.annotations_fetched(self,job)
return bookmarked_books
def generate_annotation_html(self, bookmark):
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
# Returns <div class="user_annotations"> ... </div>
#last_read_location = bookmark.last_read_location
#timestamp = bookmark.timestamp
percent_read = bookmark.percent_read
debug_print("Date: ", bookmark.last_read)
if bookmark.last_read is not None:
try:
last_read = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(calendar.timegm(time.strptime(bookmark.last_read, "%Y-%m-%dT%H:%M:%S"))))
except:
last_read = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(calendar.timegm(time.strptime(bookmark.last_read, "%Y-%m-%dT%H:%M:%S.%f"))))
else:
#self.datetime = time.gmtime()
last_read = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
# debug_print("Percent read: ", percent_read)
ka_soup = BeautifulSoup()
dtc = 0
divTag = Tag(ka_soup,'div')
divTag['class'] = 'user_annotations'
# Add the last-read location
spanTag = Tag(ka_soup, 'span')
spanTag['style'] = 'font-weight:normal'
if bookmark.book_format == 'epub':
spanTag.insert(0,NavigableString(
_("<hr /><b>Book Last Read:</b> %(time)s<br /><b>Percentage Read:</b> %(pr)d%%<hr />") % \
dict(time=last_read,
#loc=last_read_location,
pr=percent_read)))
else:
spanTag.insert(0,NavigableString(
_("<hr /><b>Book Last Read:</b> %(time)s<br /><b>Percentage Read:</b> %(pr)d%%<hr />") % \
dict(time=last_read,
#loc=last_read_location,
pr=percent_read)))
divTag.insert(dtc, spanTag)
dtc += 1
divTag.insert(dtc, Tag(ka_soup,'br'))
dtc += 1
if bookmark.user_notes:
user_notes = bookmark.user_notes
annotations = []
# Add the annotations sorted by location
for location in sorted(user_notes):
if user_notes[location]['type'] == 'Bookmark':
annotations.append(
_('<b>Chapter %(chapter)d:</b> %(chapter_title)s<br /><b>%(typ)s</b><br /><b>Chapter Progress:</b> %(chapter_progress)s%%<br />%(annotation)s<br /><hr />') % \
dict(chapter=user_notes[location]['chapter'],
dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type'],
chapter_title=user_notes[location]['chapter_title'],
chapter_progress=user_notes[location]['chapter_progress'],
annotation=user_notes[location]['annotation'] if user_notes[location]['annotation'] is not None else ""))
elif user_notes[location]['type'] == 'Highlight':
annotations.append(
_('<b>Chapter %(chapter)d:</b> %(chapter_title)s<br /><b>%(typ)s</b><br /><b>Chapter Progress:</b> %(chapter_progress)s%%<br /><b>Highlight:</b> %(text)s<br /><hr />') % \
dict(chapter=user_notes[location]['chapter'],
dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type'],
chapter_title=user_notes[location]['chapter_title'],
chapter_progress=user_notes[location]['chapter_progress'],
text=user_notes[location]['text']))
elif user_notes[location]['type'] == 'Annotation':
annotations.append(
_('<b>Chapter %(chapter)d:</b> %(chapter_title)s<br /><b>%(typ)s</b><br /><b>Chapter Progress:</b> %(chapter_progress)s%%<br /><b>Highlight:</b> %(text)s<br /><b>Notes:</b> %(annotation)s<br /><hr />') % \
dict(chapter=user_notes[location]['chapter'],
dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type'],
chapter_title=user_notes[location]['chapter_title'],
chapter_progress=user_notes[location]['chapter_progress'],
text=user_notes[location]['text'],
annotation=user_notes[location]['annotation']))
else:
annotations.append(
_('<b>Chapter %(chapter)d:</b> %(chapter_title)s<br /><b>%(typ)s</b><br /><b>Chapter Progress:</b> %(chapter_progress)s%%<br /><b>Highlight:</b> %(text)s<br /><b>Notes:</b> %(annotation)s<br /><hr />') % \
dict(chapter=user_notes[location]['chapter'],
dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type'],
chapter_title=user_notes[location]['chapter_title'],
chapter_progress=user_notes[location]['chapter_progress'],
text=user_notes[location]['text'], \
annotation=user_notes[location]['annotation']))
for annotation in annotations:
divTag.insert(dtc, annotation)
dtc += 1
ka_soup.insert(0,divTag)
return ka_soup
def add_annotation_to_library(self, db, db_id, annotation):
from calibre.ebooks.BeautifulSoup import Tag
bm = annotation
ignore_tags = set(['Catalog', 'Clippings'])
if bm.type == 'kobo_bookmark':
mi = db.get_metadata(db_id, index_is_id=True)
user_notes_soup = self.generate_annotation_html(bm.value)
if mi.comments:
a_offset = mi.comments.find('<div class="user_annotations">')
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
if a_offset >= 0:
mi.comments = mi.comments[:a_offset]
if ad_offset >= 0:
mi.comments = mi.comments[:ad_offset]
if set(mi.tags).intersection(ignore_tags):
return
if mi.comments:
hrTag = Tag(user_notes_soup,'hr')
hrTag['class'] = 'annotations_divider'
user_notes_soup.insert(0, hrTag)
mi.comments += unicode(user_notes_soup.prettify())
else:
mi.comments = unicode(user_notes_soup.prettify())
# Update library comments
db.set_comment(db_id, mi.comments)
# Add bookmark file to db_id
db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
bm.value.path, index_is_id=True)

View File

@ -1,226 +1,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <fcntl.h>
#define MARKER ".created_by_calibre_mount_helper"
#define False 0
#define True 1
int exists(const char *path) {
struct stat file_info;
if (stat(path, &file_info) == 0) return True;
return False;
}
int get_root() {
int res;
res = setreuid(0, 0);
if (res != 0) return False;
if (setregid(0, 0) != 0) return False;
return True;
}
void ensure_root() {
if (!get_root()) {
fprintf(stderr, "Failed to get root.\n");
exit(EXIT_FAILURE);
}
}
int do_mount(const char *dev, const char *mp) {
char options[1000], marker[2000];
#ifdef __NetBSD__
char uids[100], gids[100];
#endif
int errsv;
if (!exists(dev)) {
fprintf(stderr, "Specified device node does not exist\n");
return EXIT_FAILURE;
}
if (!exists(mp)) {
if (mkdir(mp, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) != 0) {
errsv = errno;
fprintf(stderr, "Failed to create mount point with error: %s\n", strerror(errsv));
}
}
snprintf(marker, 2000, "%s/%s", mp, MARKER);
if (!exists(marker)) {
int fd = creat(marker, S_IRUSR|S_IWUSR);
if (fd == -1) {
int errsv = errno;
fprintf(stderr, "Failed to create marker with error: %s\n", strerror(errsv));
return EXIT_FAILURE;
}
close(fd);
}
#ifdef __NetBSD__
snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev");
snprintf(uids, 100, "%d", getuid());
snprintf(gids, 100, "%d", getgid());
#else
#ifdef __FreeBSD__
snprintf(options, 1000, "rw,noexec,nosuid,sync,-u=%d,-g=%d",getuid(),getgid());
#else
snprintf(options, 1000, "rw,noexec,nosuid,sync,nodev,quiet,shortname=mixed,uid=%d,gid=%d,umask=077,fmask=0177,dmask=0077,utf8,iocharset=iso8859-1", getuid(), getgid());
#endif
#endif
ensure_root();
#ifdef __NetBSD__
execlp("mount_msdos", "mount_msdos", "-u", uids, "-g", gids, "-o", options, dev, mp, NULL);
#else
#ifdef __FreeBSD__
execlp("mount", "mount", "-t", "msdosfs", "-o", options, dev, mp, NULL);
#else
execlp("mount", "mount", "-t", "auto", "-o", options, dev, mp, NULL);
#endif
#endif
errsv = errno;
fprintf(stderr, "Failed to mount with error: %s\n", strerror(errsv));
return EXIT_FAILURE;
}
int call_eject(const char *dev, const char *mp) {
int ret, pid, errsv, i, status = EXIT_FAILURE;
pid = fork();
if (pid == -1) {
fprintf(stderr, "Failed to fork\n");
exit(EXIT_FAILURE);
}
if (pid == 0) { /* Child process */
ensure_root();
#ifdef __NetBSD__
execlp("eject", "eject", dev, NULL);
#else
#ifdef __FreeBSD__
execlp("umount", "umount", dev, NULL);
#else
execlp("eject", "eject", "-s", dev, NULL);
#endif
#endif
/* execlp failed */
errsv = errno;
fprintf(stderr, "Failed to eject with error: %s\n", strerror(errsv));
exit(EXIT_FAILURE);
} else { /* Parent */
for (i = 0; i < 7; i++) {
sleep(1);
ret = waitpid(pid, &status, WNOHANG);
if (ret == -1) return False;
if (ret > 0) break;
}
return WIFEXITED(status) && WEXITSTATUS(status) == 0;
}
return False;
}
int call_umount(const char *dev, const char *mp) {
int ret, pid, errsv, i, status = EXIT_FAILURE;
pid = fork();
if (pid == -1) {
fprintf(stderr, "Failed to fork\n");
exit(EXIT_FAILURE);
}
if (pid == 0) { /* Child process */
ensure_root();
#ifdef __FreeBSD__
execlp("umount", "umount", mp, NULL);
#else
execlp("umount", "umount", "-l", mp, NULL);
#endif
/* execlp failed */
errsv = errno;
fprintf(stderr, "Failed to umount with error: %s\n", strerror(errsv));
exit(EXIT_FAILURE);
} else { /* Parent */
for (i = 0; i < 7; i++) {
sleep(1);
ret = waitpid(pid, &status, WNOHANG);
if (ret == -1) return False;
if (ret > 0) break;
}
return WIFEXITED(status) && WEXITSTATUS(status) == 0;
}
return False;
}
int cleanup_mount_point(const char *mp) {
char marker[2000];
int urt, rmd, errsv;
snprintf(marker, 2000, "%s/%s", mp, MARKER);
if (exists(marker)) {
urt = unlink(marker);
if (urt == -1) {
errsv = errno;
fprintf(stderr, "Failed to unlink marker: %s\n", strerror(errsv));
return EXIT_FAILURE;
}
}
rmd = rmdir(mp);
if (rmd == -1) {
errsv = errno;
fprintf(stderr, "Failed to remove mount point: %s\n", strerror(errsv));
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int do_eject(const char *dev, const char *mp) {
int unmounted = False;
ensure_root();
unmounted = call_eject(dev, mp);
if (!unmounted) call_umount(dev, mp);
if (unmounted) return cleanup_mount_point(mp);
return EXIT_FAILURE;
}
int cleanup(const char *dev, const char *mp) {
ensure_root();
call_umount(dev, mp);
return cleanup_mount_point(mp);
}
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
char *action, *dev, *mp; return EXIT_FAILURE;
int status = EXIT_FAILURE;
/*printf("Real UID\t= %d\n", getuid());
printf("Effective UID\t= %d\n", geteuid());
printf("Real GID\t= %d\n", getgid());
printf("Effective GID\t= %d\n", getegid());*/
if (argc != 4) {
fprintf(stderr, "Needs 3 arguments: action, device node and mount point\n");
exit(EXIT_FAILURE);
}
action = argv[1]; dev = argv[2]; mp = argv[3];
if (strncmp(action, "mount", 5) == 0) {
status = do_mount(dev, mp);
} else if (strncmp(action, "eject", 5) == 0) {
status = do_eject(dev, mp);
} else if (strncmp(action, "cleanup", 7) == 0) {
status = cleanup(dev, mp);
} else {
fprintf(stderr, "Unrecognized action: must be mount, eject or cleanup\n");
}
return status;
} }

View File

@ -224,7 +224,7 @@ class TREKSTOR(USBMS):
FORMATS = ['epub', 'txt', 'pdf'] FORMATS = ['epub', 'txt', 'pdf']
VENDOR_ID = [0x1e68] VENDOR_ID = [0x1e68]
PRODUCT_ID = [0x0041, 0x0042, PRODUCT_ID = [0x0041, 0x0042, 0x0052,
0x003e # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091 0x003e # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091
] ]
BCD = [0x0002] BCD = [0x0002]

View File

@ -20,9 +20,8 @@ from calibre.devices.usbms.driver import USBMS, debug_print
from calibre.devices.usbms.device import USBDevice from calibre.devices.usbms.device import USBDevice
from calibre.devices.usbms.books import CollectionsBookList from calibre.devices.usbms.books import CollectionsBookList
from calibre.devices.usbms.books import BookList from calibre.devices.usbms.books import BookList
from calibre.ebooks.metadata import authors_to_sort_string from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
from calibre.constants import islinux from calibre.constants import islinux
from calibre.ebooks.metadata import authors_to_string, authors_to_sort_string
DBPATH = 'Sony_Reader/database/books.db' DBPATH = 'Sony_Reader/database/books.db'
THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg' THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
@ -40,7 +39,8 @@ class PRST1(USBMS):
path_sep = '/' path_sep = '/'
booklist_class = CollectionsBookList booklist_class = CollectionsBookList
FORMATS = ['epub', 'pdf', 'txt'] FORMATS = ['epub', 'pdf', 'txt', 'book', 'zbf'] # The last two are
# used in japan
CAN_SET_METADATA = ['collections'] CAN_SET_METADATA = ['collections']
CAN_DO_DEVICE_DB_PLUGBOARD = True CAN_DO_DEVICE_DB_PLUGBOARD = True
@ -112,8 +112,10 @@ class PRST1(USBMS):
def post_open_callback(self): def post_open_callback(self):
# Set the thumbnail width to the theoretical max if the user has asked # Set the thumbnail width to the theoretical max if the user has asked
# that we do not preserve aspect ratio # that we do not preserve aspect ratio
if not self.settings().extra_customization[self.OPT_PRESERVE_ASPECT_RATIO]: ec = self.settings().extra_customization
if not ec[self.OPT_PRESERVE_ASPECT_RATIO]:
self.THUMBNAIL_WIDTH = 108 self.THUMBNAIL_WIDTH = 108
self.WANTS_UPDATED_THUMBNAILS = ec[self.OPT_REFRESH_COVERS]
# Make sure the date offset is set to none, we'll calculate it in books. # Make sure the date offset is set to none, we'll calculate it in books.
self.device_offset = None self.device_offset = None
@ -186,7 +188,7 @@ class PRST1(USBMS):
if self.device_offset is None: if self.device_offset is None:
query = 'SELECT file_path, modified_date FROM books' query = 'SELECT file_path, modified_date FROM books'
cursor.execute(query) cursor.execute(query)
time_offsets = {} time_offsets = {}
for i, row in enumerate(cursor): for i, row in enumerate(cursor):
comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000); comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
@ -194,7 +196,7 @@ class PRST1(USBMS):
offset = device_date - comp_date offset = device_date - comp_date
time_offsets.setdefault(offset, 0) time_offsets.setdefault(offset, 0)
time_offsets[offset] = time_offsets[offset] + 1 time_offsets[offset] = time_offsets[offset] + 1
try: try:
device_offset = max(time_offsets,key = lambda a: time_offsets.get(a)) device_offset = max(time_offsets,key = lambda a: time_offsets.get(a))
debug_print("Device Offset: %d ms"%device_offset) debug_print("Device Offset: %d ms"%device_offset)
@ -304,7 +306,7 @@ class PRST1(USBMS):
if use_sony_authors: if use_sony_authors:
author = newmi.authors[0] author = newmi.authors[0]
else: else:
author = authors_to_string(newmi.authors) author = authors_to_string(newmi.authors)
except: except:
author = _('Unknown') author = _('Unknown')
title = newmi.title or _('Unknown') title = newmi.title or _('Unknown')
@ -348,7 +350,7 @@ class PRST1(USBMS):
if self.is_sony_periodical(book): if self.is_sony_periodical(book):
self.periodicalize_book(connection, book) self.periodicalize_book(connection, book)
for book, bookId in db_books.items(): for book, bookId in db_books.items():
if bookId is not None: if bookId is not None:
# Remove From Collections # Remove From Collections
@ -531,7 +533,7 @@ class PRST1(USBMS):
if book.pubdate.date() < date(2010, 10, 17): if book.pubdate.date() < date(2010, 10, 17):
return False return False
return True return True
def periodicalize_book(self, connection, book): def periodicalize_book(self, connection, book):
if not self.is_sony_periodical(book): if not self.is_sony_periodical(book):
return return
@ -555,19 +557,26 @@ class PRST1(USBMS):
pubdate = int(time.mktime(book.pubdate.timetuple()) * 1000) pubdate = int(time.mktime(book.pubdate.timetuple()) * 1000)
except: except:
pass pass
cursor = connection.cursor() cursor = connection.cursor()
periodical_schema = \
"'http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0'"
# Setting this to the SONY periodical schema apparently causes errors
# with some periodicals, therefore set it to null, since the special
# periodical navigation doesn't work anyway.
periodical_schema = 'null'
query = ''' query = '''
UPDATE books UPDATE books
SET conforms_to = 'http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0', SET conforms_to = %s,
periodical_name = ?, periodical_name = ?,
description = ?, description = ?,
publication_date = ? publication_date = ?
WHERE _id = ? WHERE _id = ?
''' '''%periodical_schema
t = (name, None, pubdate, book.bookId,) t = (name, None, pubdate, book.bookId,)
cursor.execute(query, t) cursor.execute(query, t)
connection.commit() connection.commit()
cursor.close() cursor.close()

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import dbus, os import os, dbus
def node_mountpoint(node): def node_mountpoint(node):
@ -23,8 +23,6 @@ def node_mountpoint(node):
class UDisks(object): class UDisks(object):
def __init__(self): def __init__(self):
if os.environ.get('CALIBRE_DISABLE_UDISKS', False):
raise Exception('User has aborted use of UDISKS')
self.bus = dbus.SystemBus() self.bus = dbus.SystemBus()
self.main = dbus.Interface(self.bus.get_object('org.freedesktop.UDisks', self.main = dbus.Interface(self.bus.get_object('org.freedesktop.UDisks',
'/org/freedesktop/UDisks'), 'org.freedesktop.UDisks') '/org/freedesktop/UDisks'), 'org.freedesktop.UDisks')
@ -39,7 +37,7 @@ class UDisks(object):
try: try:
return unicode(d.FilesystemMount('', return unicode(d.FilesystemMount('',
['auth_no_user_interaction', 'rw', 'noexec', 'nosuid', ['auth_no_user_interaction', 'rw', 'noexec', 'nosuid',
'sync', 'nodev', 'uid=1000', 'gid=1000'])) 'sync', 'nodev', 'uid=%d'%os.geteuid(), 'gid=%d'%os.getegid()]))
except: except:
# May be already mounted, check # May be already mounted, check
mp = node_mountpoint(str(device_node_path)) mp = node_mountpoint(str(device_node_path))

View File

@ -616,20 +616,11 @@ class Device(DeviceConfig, DevicePlugin):
mount(node) mount(node)
return 0 return 0
except: except:
pass print 'Udisks mount call failed:'
import traceback
traceback.print_exc()
return 1
cmd = 'calibre-mount-helper'
if getattr(sys, 'frozen', False):
cmd = os.path.join(sys.executables_location, 'bin', cmd)
cmd = [cmd, 'mount']
try:
p = subprocess.Popen(cmd + [node, '/media/'+label])
except OSError:
raise DeviceError(
_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None:
time.sleep(0.1)
return p.returncode
ret = do_mount(node, label) ret = do_mount(node, label)
if ret != 0: if ret != 0:
@ -777,9 +768,12 @@ class Device(DeviceConfig, DevicePlugin):
# try all the nodes to see what we can mount # try all the nodes to see what we can mount
for dev in devs[i].split(): for dev in devs[i].split():
mp='/media/'+label+'-'+dev mp='/media/'+label+'-'+dev
mmp = mp
if mmp.endswith('/'):
mmp = mmp[:-1]
#print "trying ", dev, "on", mp #print "trying ", dev, "on", mp
try: try:
p = subprocess.Popen(cmd + ["/dev/"+dev, mp]) p = subprocess.Popen(cmd + ["/dev/"+dev, mmp])
except OSError: except OSError:
raise DeviceError(_('Could not find mount helper: %s.')%cmd[0]) raise DeviceError(_('Could not find mount helper: %s.')%cmd[0])
while p.poll() is None: while p.poll() is None:
@ -928,29 +922,12 @@ class Device(DeviceConfig, DevicePlugin):
umount(d) umount(d)
except: except:
pass pass
failures = False
for d in drives: for d in drives:
try: try:
eject(d) eject(d)
except Exception as e: except Exception as e:
print 'Udisks eject call for:', d, 'failed:' print 'Udisks eject call for:', d, 'failed:'
print '\t', e print '\t', e
failures = True
if not failures:
return
for drive in drives:
cmd = 'calibre-mount-helper'
if getattr(sys, 'frozen', False):
cmd = os.path.join(sys.executables_location, 'bin', cmd)
cmd = [cmd, 'eject']
mp = getattr(self, "_linux_mount_map", {}).get(drive,
'dummy/')[:-1]
try:
subprocess.Popen(cmd + [drive, mp]).wait()
except:
pass
def eject(self): def eject(self):
if islinux: if islinux:
@ -976,19 +953,6 @@ class Device(DeviceConfig, DevicePlugin):
self._main_prefix = self._card_a_prefix = self._card_b_prefix = None self._main_prefix = self._card_a_prefix = self._card_b_prefix = None
def linux_post_yank(self): def linux_post_yank(self):
for drive, mp in getattr(self, '_linux_mount_map', {}).items():
if drive and mp:
mp = mp[:-1]
cmd = 'calibre-mount-helper'
if getattr(sys, 'frozen', False):
cmd = os.path.join(sys.executables_location, 'bin', cmd)
cmd = [cmd, 'cleanup']
if mp and os.path.exists(mp):
try:
subprocess.Popen(cmd + [drive, mp]).wait()
except:
import traceback
traceback.print_exc()
self._linux_mount_map = {} self._linux_mount_map = {}
def post_yank_cleanup(self): def post_yank_cleanup(self):
@ -1068,6 +1032,12 @@ class Device(DeviceConfig, DevicePlugin):
''' '''
return {} return {}
def add_annotation_to_library(self, db, db_id, annotation):
'''
Add an annotation to the calibre library
'''
pass
def create_upload_path(self, path, mdata, fname, create_dirs=True): def create_upload_path(self, path, mdata, fname, create_dirs=True):
path = os.path.abspath(path) path = os.path.abspath(path)
maxlen = self.MAX_PATH_LEN maxlen = self.MAX_PATH_LEN
@ -1147,3 +1117,6 @@ class Device(DeviceConfig, DevicePlugin):
os.makedirs(filedir) os.makedirs(filedir)
return filepath return filepath
def create_annotations_path(self, mdata, device_path=None):
return self.create_upload_path(os.path.abspath('/<storage>'), mdata, 'x.bookmark', create_dirs=False)

View File

@ -30,7 +30,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
'xps', 'oxps', 'azw4'] 'xps', 'oxps', 'azw4', 'book', 'zbf']
class HTMLRenderer(object): class HTMLRenderer(object):

View File

@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False): def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
from calibre.ebooks.chm.reader import CHMReader from calibre.ebooks.chm.reader import CHMReader
log.debug('Opening CHM file') log.debug('Opening CHM file')
rdr = CHMReader(chm_path, log, self.opts) rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding)
log.debug('Extracting CHM to %s' % output_dir) log.debug('Extracting CHM to %s' % output_dir)
rdr.extract_content(output_dir, debug_dump=debug_dump) rdr.extract_content(output_dir, debug_dump=debug_dump)
self._chm_reader = rdr self._chm_reader = rdr

View File

@ -40,14 +40,14 @@ class CHMError(Exception):
pass pass
class CHMReader(CHMFile): class CHMReader(CHMFile):
def __init__(self, input, log, opts): def __init__(self, input, log, input_encoding=None):
CHMFile.__init__(self) CHMFile.__init__(self)
if isinstance(input, unicode): if isinstance(input, unicode):
input = input.encode(filesystem_encoding) input = input.encode(filesystem_encoding)
if not self.LoadCHM(input): if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,)) raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log self.log = log
self.opts = opts self.input_encoding = input_encoding
self._sourcechm = input self._sourcechm = input
self._contents = None self._contents = None
self._playorder = 0 self._playorder = 0
@ -156,8 +156,8 @@ class CHMReader(CHMFile):
break break
def _reformat(self, data, htmlpath): def _reformat(self, data, htmlpath):
if self.opts.input_encoding: if self.input_encoding:
data = data.decode(self.opts.input_encoding) data = data.decode(self.input_encoding)
try: try:
data = xml_to_unicode(data, strip_encoding_pats=True)[0] data = xml_to_unicode(data, strip_encoding_pats=True)[0]
soup = BeautifulSoup(data) soup = BeautifulSoup(data)

View File

@ -127,7 +127,7 @@ class FB2Input(InputFormatPlugin):
def extract_embedded_content(self, doc): def extract_embedded_content(self, doc):
self.binary_map = {} self.binary_map = {}
for elem in doc.xpath('./*'): for elem in doc.xpath('./*'):
if 'binary' in elem.tag and elem.attrib.has_key('id'): if elem.text and 'binary' in elem.tag and elem.attrib.has_key('id'):
ct = elem.get('content-type', '') ct = elem.get('content-type', '')
fname = elem.attrib['id'] fname = elem.attrib['id']
ext = ct.rpartition('/')[-1].lower() ext = ct.rpartition('/')[-1].lower()

View File

@ -109,14 +109,16 @@ class HTMLFile(object):
try: try:
with open(self.path, 'rb') as f: with open(self.path, 'rb') as f:
src = f.read() src = f.read(4096)
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src))
if not self.is_binary:
src += f.read()
except IOError as err: except IOError as err:
msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err)) msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err))
if level == 0: if level == 0:
raise IOError(msg) raise IOError(msg)
raise IgnoreFile(msg, err.errno) raise IgnoreFile(msg, err.errno)
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
if not self.is_binary: if not self.is_binary:
if not encoding: if not encoding:
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1] encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]

View File

@ -116,10 +116,14 @@ def title_sort(title, order=None):
title = title[1:] title = title[1:]
match = _title_pat.search(title) match = _title_pat.search(title)
if match: if match:
prep = match.group(1) try:
title = title[len(prep):] + ', ' + prep prep = match.group(1)
if title[0] in _ignore_starts: except IndexError:
title = title[1:] pass
else:
title = title[len(prep):] + ', ' + prep
if title[0] in _ignore_starts:
title = title[1:]
return title.strip() return title.strip()
coding = zip( coding = zip(

View File

@ -30,9 +30,11 @@ class Worker(Thread): # Get details {{{
Get book details from amazons book page in a separate thread Get book details from amazons book page in a separate thread
''' '''
def __init__(self, url, result_queue, browser, log, relevance, domain, plugin, timeout=20): def __init__(self, url, result_queue, browser, log, relevance, domain,
plugin, timeout=20, testing=False):
Thread.__init__(self) Thread.__init__(self)
self.daemon = True self.daemon = True
self.testing = testing
self.url, self.result_queue = url, result_queue self.url, self.result_queue = url, result_queue
self.log, self.timeout = log, timeout self.log, self.timeout = log, timeout
self.relevance, self.plugin = relevance, plugin self.relevance, self.plugin = relevance, plugin
@ -189,10 +191,9 @@ class Worker(Thread): # Get details {{{
self.log.exception(msg) self.log.exception(msg)
return return
oraw = raw
raw = xml_to_unicode(raw, strip_encoding_pats=True, raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0] resolve_entities=True)[0]
#open('/t/t.html', 'wb').write(raw)
if '<title>404 - ' in raw: if '<title>404 - ' in raw:
self.log.error('URL malformed: %r'%self.url) self.log.error('URL malformed: %r'%self.url)
return return
@ -211,14 +212,20 @@ class Worker(Thread): # Get details {{{
self.log.error(msg) self.log.error(msg)
return return
self.parse_details(root) self.parse_details(oraw, root)
def parse_details(self, root): def parse_details(self, raw, root):
try: try:
asin = self.parse_asin(root) asin = self.parse_asin(root)
except: except:
self.log.exception('Error parsing asin for url: %r'%self.url) self.log.exception('Error parsing asin for url: %r'%self.url)
asin = None asin = None
if self.testing:
import tempfile
with tempfile.NamedTemporaryFile(prefix=asin + '_',
suffix='.html', delete=False) as f:
f.write(raw)
print ('Downloaded html for', asin, 'saved in', f.name)
try: try:
title = self.parse_title(root) title = self.parse_title(root)
@ -310,7 +317,7 @@ class Worker(Thread): # Get details {{{
return l.get('href').rpartition('/')[-1] return l.get('href').rpartition('/')[-1]
def parse_title(self, root): def parse_title(self, root):
tdiv = root.xpath('//h1[@class="parseasinTitle"]')[0] tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]') actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
if actual_title: if actual_title:
title = tostring(actual_title[0], encoding=unicode, title = tostring(actual_title[0], encoding=unicode,
@ -320,11 +327,11 @@ class Worker(Thread): # Get details {{{
return re.sub(r'[(\[].*[)\]]', '', title).strip() return re.sub(r'[(\[].*[)\]]', '', title).strip()
def parse_authors(self, root): def parse_authors(self, root):
x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]' x = '//h1[contains(@class, "parseasinTitle")]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
aname = root.xpath(x) aname = root.xpath(x)
if not aname: if not aname:
aname = root.xpath(''' aname = root.xpath('''
//h1[@class="parseasinTitle"]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")] //h1[contains(@class, "parseasinTitle")]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
''') ''')
for x in aname: for x in aname:
x.tail = '' x.tail = ''
@ -666,7 +673,8 @@ class Amazon(Source):
log.error('No matches found with query: %r'%query) log.error('No matches found with query: %r'%query)
return return
workers = [Worker(url, result_queue, br, log, i, domain, self) for i, url in workers = [Worker(url, result_queue, br, log, i, domain, self,
testing=getattr(self, 'running_a_test', False)) for i, url in
enumerate(matches)] enumerate(matches)]
for w in workers: for w in workers:
@ -740,16 +748,6 @@ if __name__ == '__main__': # tests {{{
), ),
( # An e-book ISBN not on Amazon, the title/author search matches
# the Kindle edition, which has different markup for ratings and
# isbn
{'identifiers':{'isbn': '9780307459671'},
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
),
( # This isbn not on amazon ( # This isbn not on amazon
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python', {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
'authors':['Lutz']}, 'authors':['Lutz']},
@ -783,7 +781,7 @@ if __name__ == '__main__': # tests {{{
de_tests = [ # {{{ de_tests = [ # {{{
( (
{'identifiers':{'isbn': '3548283519'}}, {'identifiers':{'isbn': '3548283519'}},
[title_test('Wer Wind sät', [title_test('Wer Wind Sät: Der Fünfte Fall Für Bodenstein Und Kirchhoff',
exact=True), authors_test(['Nele Neuhaus']) exact=True), authors_test(['Nele Neuhaus'])
] ]
@ -835,6 +833,6 @@ if __name__ == '__main__': # tests {{{
] # }}} ] # }}}
test_identify_plugin(Amazon.name, com_tests) test_identify_plugin(Amazon.name, com_tests)
#test_identify_plugin(Amazon.name, es_tests) #test_identify_plugin(Amazon.name, de_tests)
# }}} # }}}

View File

@ -196,6 +196,7 @@ class Source(Plugin):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
Plugin.__init__(self, *args, **kwargs) Plugin.__init__(self, *args, **kwargs)
self.running_a_test = False # Set to True when using identify_test()
self._isbn_to_identifier_cache = {} self._isbn_to_identifier_cache = {}
self._identifier_to_cover_url_cache = {} self._identifier_to_cover_url_cache = {}
self.cache_lock = threading.RLock() self.cache_lock = threading.RLock()
@ -284,14 +285,15 @@ class Source(Plugin):
if authors: if authors:
# Leave ' in there for Irish names # Leave ' in there for Irish names
remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]') remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
replace_pat = re.compile(r'[-+.:;]') replace_pat = re.compile(r'[-+.:;,]')
if only_first_author: if only_first_author:
authors = authors[:1] authors = authors[:1]
for au in authors: for au in authors:
has_comma = ',' in au
au = replace_pat.sub(' ', au) au = replace_pat.sub(' ', au)
parts = au.split() parts = au.split()
if ',' in au: if has_comma:
# au probably in ln, fn form # au probably in ln, fn form
parts = parts[1:] + parts[:1] parts = parts[1:] + parts[:1]
for tok in parts: for tok in parts:

View File

@ -183,7 +183,11 @@ def test_identify_plugin(name, tests): # {{{
rq = Queue() rq = Queue()
args = (log, rq, abort) args = (log, rq, abort)
start_time = time.time() start_time = time.time()
err = plugin.identify(*args, **kwargs) plugin.running_a_test = True
try:
err = plugin.identify(*args, **kwargs)
finally:
plugin.running_a_test = False
total_time = time.time() - start_time total_time = time.time() - start_time
times.append(total_time) times.append(total_time)
if err is not None: if err is not None:

View File

@ -138,6 +138,7 @@ class MobiMLizer(object):
self.mobimlize_elem(body, stylizer, BlockState(nbody), self.mobimlize_elem(body, stylizer, BlockState(nbody),
[FormatState()]) [FormatState()])
item.data = nroot item.data = nroot
#print etree.tostring(nroot)
def mobimlize_font(self, ptsize): def mobimlize_font(self, ptsize):
return self.fnums[self.fmap[ptsize]] return self.fnums[self.fmap[ptsize]]
@ -233,9 +234,19 @@ class MobiMLizer(object):
elif tag in TABLE_TAGS: elif tag in TABLE_TAGS:
para.attrib['valign'] = 'top' para.attrib['valign'] = 'top'
if istate.ids: if istate.ids:
last = bstate.body[-1] for id_ in istate.ids:
for id in istate.ids: anchor = etree.Element(XHTML('a'), attrib={'id': id_})
last.addprevious(etree.Element(XHTML('a'), attrib={'id': id})) if tag == 'li':
try:
last = bstate.body[-1][-1]
except:
break
last.insert(0, anchor)
anchor.tail = last.text
last.text = None
else:
last = bstate.body[-1]
last.addprevious(anchor)
istate.ids.clear() istate.ids.clear()
if not text: if not text:
return return

View File

@ -66,12 +66,15 @@ class EXTHHeader(object):
# last update time # last update time
pass pass
elif id == 503: # Long title elif id == 503: # Long title
if not title or title == _('Unknown') or \ # Amazon seems to regard this as the definitive book title
'USER_CONTENT' in title or title.startswith('dtp_'): # rather than the title from the PDB header. In fact when
try: # sending MOBI files through Amazon's email service if the
title = content.decode(codec) # title contains non ASCII chars or non filename safe chars
except: # they are messed up in the PDB header
pass try:
title = content.decode(codec)
except:
pass
#else: #else:
# print 'unknown record', id, repr(content) # print 'unknown record', id, repr(content)
if title: if title:

View File

@ -601,7 +601,7 @@ class MobiWriter(object):
Write the PalmDB header Write the PalmDB header
''' '''
title = ascii_filename(unicode(self.oeb.metadata.title[0])).replace( title = ascii_filename(unicode(self.oeb.metadata.title[0])).replace(
' ', '_')[:32] ' ', '_')[:31]
title = title + (b'\0' * (32 - len(title))) title = title + (b'\0' * (32 - len(title)))
now = int(time.time()) now = int(time.time())
nrecords = len(self.records) nrecords = len(self.records)

View File

@ -74,7 +74,10 @@ class Extract(ODF2XHTML):
style = style[0] style = style[0]
css = style.text css = style.text
if css: if css:
style.text, sel_map = self.do_filter_css(css) css, sel_map = self.do_filter_css(css)
if not isinstance(css, unicode):
css = css.decode('utf-8', 'ignore')
style.text = css
for x in root.xpath('//*[@class]'): for x in root.xpath('//*[@class]'):
extra = [] extra = []
orig = x.get('class') orig = x.get('class')

View File

@ -20,6 +20,7 @@ from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log from calibre.utils.logging import Log
from calibre import guess_type, prints, prepare_string_for_xml from calibre import guess_type, prints, prepare_string_for_xml
from calibre.ebooks.oeb.transforms.cover import CoverManager from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.constants import filesystem_encoding
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\ TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800' '__ar__', 'none').replace('__viewbox__', '0 0 600 800'
@ -180,6 +181,8 @@ class EbookIterator(object):
self.delete_on_exit = [] self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter') self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__() self.base = self._tdir.__enter__()
if not isinstance(self.base, unicode):
self.base = self.base.decode(filesystem_encoding)
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
plumber = Plumber(self.pathtoebook, self.base, self.log) plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options() plumber.setup_options()

View File

@ -15,7 +15,6 @@ from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation OptionRecommendation
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.pdf.writer import PDFWriter, ImagePDFWriter, PDFMetadata
from calibre.ebooks.pdf.pageoptions import UNITS, PAPER_SIZES, \ from calibre.ebooks.pdf.pageoptions import UNITS, PAPER_SIZES, \
ORIENTATIONS ORIENTATIONS
@ -90,6 +89,7 @@ class PDFOutput(OutputFormatPlugin):
self.convert_text(oeb_book) self.convert_text(oeb_book)
def convert_images(self, images): def convert_images(self, images):
from calibre.ebooks.pdf.writer import ImagePDFWriter
self.write(ImagePDFWriter, images) self.write(ImagePDFWriter, images)
def get_cover_data(self): def get_cover_data(self):
@ -105,6 +105,7 @@ class PDFOutput(OutputFormatPlugin):
self.cover_data = None self.cover_data = None
def convert_text(self, oeb_book): def convert_text(self, oeb_book):
from calibre.ebooks.pdf.writer import PDFWriter
self.log.debug('Serializing oeb input to disk for processing...') self.log.debug('Serializing oeb input to disk for processing...')
self.get_cover_data() self.get_cover_data()
@ -119,6 +120,7 @@ class PDFOutput(OutputFormatPlugin):
self.write(PDFWriter, [s.path for s in opf.spine]) self.write(PDFWriter, [s.path for s in opf.spine])
def write(self, Writer, items): def write(self, Writer, items):
from calibre.ebooks.pdf.writer import PDFMetadata
writer = Writer(self.opts, self.log, cover_data=self.cover_data) writer = Writer(self.opts, self.log, cover_data=self.cover_data)
close = False close = False

View File

@ -104,8 +104,9 @@ class RBWriter(object):
size = len(text) size = len(text)
pages = [] pages = []
for i in range(0, (len(text) / TEXT_RECORD_SIZE) + 1): for i in range(0, (len(text) + TEXT_RECORD_SIZE-1) / TEXT_RECORD_SIZE):
pages.append(zlib.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE], 9)) zobj = zlib.compressobj(9, zlib.DEFLATED, 13, 8, 0)
pages.append(zobj.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE]) + zobj.flush())
return (size, pages) return (size, pages)

View File

@ -11,9 +11,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os
from calibre.ebooks.rtf2xml import copy, check_brackets from calibre.ebooks.rtf2xml import copy, check_brackets
from calibre.ptempfile import better_mktemp
class AddBrackets: class AddBrackets:
""" """
@ -38,7 +39,7 @@ class AddBrackets:
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):

View File

@ -10,8 +10,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import os, tempfile import os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
""" """
Simply write the list of strings after style table Simply write the list of strings after style table
""" """
@ -42,7 +44,7 @@ class BodyStyles:
self.__copy = copy self.__copy = copy
self.__list_of_styles = list_of_styles self.__list_of_styles = list_of_styles
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
# self.__write_to = 'table_info.data' # self.__write_to = 'table_info.data'
def insert_info(self): def insert_info(self):
""" """

View File

@ -10,9 +10,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile, re import sys, os, re
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
class Colors: class Colors:
""" """
@ -38,7 +39,7 @@ class Colors:
self.__copy = copy self.__copy = copy
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__line = 0 self.__line = 0
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):

View File

@ -10,9 +10,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import os, tempfile import os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
class CombineBorders: class CombineBorders:
"""Combine borders in RTF tokens to make later processing easier""" """Combine borders in RTF tokens to make later processing easier"""
@ -25,7 +26,7 @@ class CombineBorders:
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
self.__state = 'default' self.__state = 'default'
self.__bord_pos = 'default' self.__bord_pos = 'default'
self.__bord_att = [] self.__bord_att = []

View File

@ -1,7 +1,8 @@
import os, tempfile, sys import os, tempfile
from codecs import EncodedFile from codecs import EncodedFile
from calibre.ebooks.rtf2xml import copy, check_encoding from calibre.ebooks.rtf2xml import copy, check_encoding
from calibre.ptempfile import better_mktemp
public_dtd = 'rtf2xml1.0.dtd' public_dtd = 'rtf2xml1.0.dtd'
@ -39,7 +40,7 @@ class ConvertToTags:
self.__encoding = 'mac_roman' self.__encoding = 'mac_roman'
self.__indent = indent self.__indent = indent
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
self.__convert_utf = False self.__convert_utf = False
self.__bad_encoding = False self.__bad_encoding = False

View File

@ -10,9 +10,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
class DeleteInfo: class DeleteInfo:
"""Delete unecessary destination groups""" """Delete unecessary destination groups"""
@ -25,7 +26,7 @@ class DeleteInfo:
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
self.__run_level = run_level self.__run_level = run_level
self.__initiate_allow() self.__initiate_allow()
self.__bracket_count= 0 self.__bracket_count= 0

View File

@ -10,8 +10,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os
from calibre.ebooks.rtf2xml import field_strings, copy from calibre.ebooks.rtf2xml import field_strings, copy
from calibre.ptempfile import better_mktemp
class FieldsLarge: class FieldsLarge:
""" """
========================= =========================
@ -99,7 +101,7 @@ Examples
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__run_level = run_level self.__run_level = run_level
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
def __initiate_values(self): def __initiate_values(self):
""" """
Initiate all values. Initiate all values.

View File

@ -10,9 +10,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile, re import sys, os, re
from calibre.ebooks.rtf2xml import field_strings, copy from calibre.ebooks.rtf2xml import field_strings, copy
from calibre.ptempfile import better_mktemp
class FieldsSmall: class FieldsSmall:
""" """
@ -50,7 +51,7 @@ file.
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):

View File

@ -10,9 +10,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import sys, os, tempfile import sys, os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
class Fonts: class Fonts:
""" """
@ -40,7 +41,7 @@ class Fonts:
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__default_font_num = default_font_num self.__default_font_num = default_font_num
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
self.__run_level = run_level self.__run_level = run_level
def __initiate_values(self): def __initiate_values(self):

View File

@ -10,9 +10,10 @@
# # # #
# # # #
######################################################################### #########################################################################
import os, tempfile import os
from calibre.ebooks.rtf2xml import copy from calibre.ebooks.rtf2xml import copy
from calibre.ptempfile import better_mktemp
class Footnote: class Footnote:
""" """
@ -30,7 +31,7 @@ class Footnote:
self.__file = in_file self.__file = in_file
self.__bug_handler = bug_handler self.__bug_handler = bug_handler
self.__copy = copy self.__copy = copy
self.__write_to = tempfile.mktemp() self.__write_to = better_mktemp()
self.__found_a_footnote = 0 self.__found_a_footnote = 0
def __first_line_func(self, line): def __first_line_func(self, line):
@ -114,7 +115,7 @@ class Footnote:
bottom of the main file. bottom of the main file.
""" """
self.__initiate_sep_values() self.__initiate_sep_values()
self.__footnote_holder = tempfile.mktemp() self.__footnote_holder = better_mktemp()
with open(self.__file) as read_obj: with open(self.__file) as read_obj:
with open(self.__write_to, 'w') as self.__write_obj: with open(self.__write_to, 'w') as self.__write_obj:
with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj: with open(self.__footnote_holder, 'w') as self.__write_to_foot_obj:
@ -243,7 +244,7 @@ class Footnote:
""" """
if not self.__found_a_footnote: if not self.__found_a_footnote:
return return
self.__write_to2 = tempfile.mktemp() self.__write_to2 = better_mktemp()
self.__state = 'body' self.__state = 'body'
self.__get_footnotes() self.__get_footnotes()
self.__join_from_temp() self.__join_from_temp()

Some files were not shown because too many files have changed in this diff Show More