Sync to trunk.

This commit is contained in:
John Schember 2011-10-24 06:57:50 -04:00
commit 88e9a7b626
134 changed files with 33969 additions and 27196 deletions

View File

@ -19,6 +19,65 @@
# new recipes:
# - title:
- version: 0.8.23
date: 2011-10-21
new features:
- title: "Drivers for T-Mobile Move, new Pandigital Novel, New Onyx Boox and Freescale MX 515"
- title: "SONY T1 driver: Support for periodicals and better timezone detection"
- title: "Add a remove cover entry to the right click menu of the cover display in the right panel"
tickets: [874689]
bug fixes:
- title: "Amazon metadata download: Fix for change in Amazon website that broke downloading metadata."
tickets: [878395]
- title: "MOBI metadata: When reading titles from MOBI files only use the title in the PDB header if there is no long title in the EXTH header"
tickets: [ 875243 ]
- title: "Fix regression that broke use of complex custom columns in save to disk templates."
tickets: [877366]
- title: "Fix regression that broke reading metadata from CHM files"
- title: "Fix a bug that broke conversion of some zipped up HTML files with non ascii filenames on certain windows installs."
tickets: [873288]
- title: "RTF Input: Fix bug in handling of paragraph separators."
tickets: [863735]
- title: "Fix a regression that broke downloading certain periodicals for the Kindle."
tickets: [875595]
- title: "Fix regression that broke updating of covers inside ebook files when saving to disk"
- title: "Fix regression breaking editing the 'show in tag browser' checkbox in custom column setup editing"
- title: "Fix typo that broke stopping selected jobs in 0.8.22"
improved recipes:
- Columbus Dispatch
- Ming Pao
- La Republica
- Korea Times
- USA Today
- CNN
- Liberation
- El Pais
- Helsingin Sanomat
new recipes:
- title: Kyugyhang, Hankyoreh and Hankyoreh21
author: Seongkyoun Yoo.
- title: English Katherimini
author: Thomas Scholl
- title: Various French news sources
author: Aurelien Chabot.
- version: 0.8.22
date: 2011-10-14

70
recipes/20minutes.recipe Normal file
View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
20minutes.fr
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Minutes(BasicNewsRecipe):
title = '20 minutes'
__author__ = 'calibre'
description = 'Actualités'
encoding = 'cp1252'
publisher = '20minutes.fr'
category = 'Actualités, France, Monde'
language = 'fr'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.mna-details {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.mna-image {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.mna-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['mn-section-heading']}),
dict(name='a', attrs={'href':['#commentaires']}),
dict(name='div', attrs={'class':['mn-right']}),
dict(name='div', attrs={'class':['mna-box']}),
dict(name='div', attrs={'class':['mna-comment-call']}),
dict(name='div', attrs={'class':['mna-tools']}),
dict(name='div', attrs={'class':['mn-trilist']})
]
keep_only_tags = [dict(id='mn-article')]
remove_tags_after = dict(name='div', attrs={'class':['mna-body','mna-signature']})
feeds = [
('France', 'http://www.20minutes.fr/rss/actu-france.xml'),
('International', 'http://www.20minutes.fr/rss/monde.xml'),
('Tech/Web', 'http://www.20minutes.fr/rss/hightech.xml'),
('Sciences', 'http://www.20minutes.fr/rss/sciences.xml'),
('Economie', 'http://www.20minutes.fr/rss/economie.xml'),
('Politique', 'http://www.20minutes.fr/rss/politique.xml'),
(u'Médias', 'http://www.20minutes.fr/rss/media.xml'),
('Cinema', 'http://www.20minutes.fr/rss/cinema.xml'),
('People', 'http://www.20minutes.fr/rss/people.xml'),
('Culture', 'http://www.20minutes.fr/rss/culture.xml'),
('Sport', 'http://www.20minutes.fr/rss/sport.xml'),
('Paris', 'http://www.20minutes.fr/rss/paris.xml'),
('Lyon', 'http://www.20minutes.fr/rss/lyon.xml'),
('Toulouse', 'http://www.20minutes.fr/rss/toulouse.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -22,6 +22,14 @@ class CNN(BasicNewsRecipe):
#match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
max_articles_per_feed = 25
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
preprocess_regexps = [
(re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
@ -32,7 +40,12 @@ class CNN(BasicNewsRecipe):
remove_tags = [
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
'cnn_strycntntrgt', 'hed_side', 'foot']},
'cnn_strycntntrgt', 'hed_side', 'foot', 'cnn_strylftcntnt cnn_strylftcexpbx']},
{'class':['cnn_html_media_title_new', 'cnn_html_media_title_new cnn_html_media_title_none',
'cnnArticleGalleryCaptionControlText', 'articleGalleryNavContainer']},
{'id':['articleGalleryNav00JumpPrev', 'articleGalleryNav00Prev',
'articleGalleryNav00Next', 'articleGalleryNav00JumpNext']},
{'style':['display:none']},
dict(id=['ie_column']),
]
@ -58,3 +71,12 @@ class CNN(BasicNewsRecipe):
ans = BasicNewsRecipe.get_article_url(self, article)
return ans.partition('?')[0]
def get_masthead_url(self):
masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -14,67 +14,43 @@ class ColumbusDispatchRecipe(BasicNewsRecipe):
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 1.2
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
# Seems to work best, but YMMV
simultaneous_downloads = 2
auto_cleanup = True
#auto_cleanup_keep = '//div[@id="story-photos"]'
# Feeds from http://www.dispatch.com/live/content/rss/index.html
feeds = []
feeds.append((u'News: Local and state news', u'http://www.dispatch.com/live/static/crt/2_rss_localnews.xml'))
feeds.append((u'News: National news', u'http://www.dispatch.com/live/static/crt/2_rss_nationalnews.xml'))
feeds.append((u'News: Editorials', u'http://www.dispatch.com/live/static/crt/2_rss_editorials.xml'))
feeds.append((u'News: Columnists', u'http://www.dispatch.com/live/static/crt/2_rss_columnists.xml'))
feeds.append((u'News: Health news', u'http://www.dispatch.com/live/static/crt/2_rss_health.xml'))
feeds.append((u'News: Science news', u'http://www.dispatch.com/live/static/crt/2_rss_science.xml'))
feeds.append((u'Sports: OSU football', u'http://www.dispatch.com/live/static/crt/2_rss_osufootball.xml'))
feeds.append((u'Sports: OSU men\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osumensbball.xml'))
feeds.append((u'Sports: OSU women\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osuwomensbball.xml'))
feeds.append((u'Sports: OSU sports', u'http://www.dispatch.com/live/static/crt/2_rss_osusports.xml'))
feeds.append((u'Sports: Blue Jackets', u'http://www.dispatch.com/live/static/crt/2_rss_bluejackets.xml'))
feeds.append((u'Sports: Crew', u'http://www.dispatch.com/live/static/crt/2_rss_crew.xml'))
feeds.append((u'Sports: Clippers', u'http://www.dispatch.com/live/static/crt/2_rss_clippers.xml'))
feeds.append((u'Sports: Indians', u'http://www.dispatch.com/live/static/crt/2_rss_indians.xml'))
feeds.append((u'Sports: Reds', u'http://www.dispatch.com/live/static/crt/2_rss_reds.xml'))
feeds.append((u'Sports: Golf', u'http://www.dispatch.com/live/static/crt/2_rss_golf.xml'))
feeds.append((u'Sports: Outdoors', u'http://www.dispatch.com/live/static/crt/2_rss_outdoors.xml'))
feeds.append((u'Sports: Cavs/NBA', u'http://www.dispatch.com/live/static/crt/2_rss_cavaliers.xml'))
feeds.append((u'Sports: High Schools', u'http://www.dispatch.com/live/static/crt/2_rss_highschools.xml'))
feeds.append((u'Sports: Browns', u'http://www.dispatch.com/live/static/crt/2_rss_browns.xml'))
feeds.append((u'Sports: Bengals', u'http://www.dispatch.com/live/static/crt/2_rss_bengals.xml'))
feeds.append((u'Sports: Auto Racing', u'http://www.dispatch.com/live/static/crt/2_rss_autoracing.xml'))
feeds.append((u'Business News', u'http://www.dispatch.com/live/static/crt/2_rss_business.xml'))
feeds.append((u'Features: Weekender', u'http://www.dispatch.com/live/static/crt/2_rss_weekender.xml'))
feeds.append((u'Features: Life and Arts', u'http://www.dispatch.com/live/static/crt/2_rss_lifearts.xml'))
feeds.append((u'Features: Food', u'http://www.dispatch.com/live/static/crt/2_rss_food.xml'))
feeds.append((u'Features: NOW! for kids', u'http://www.dispatch.com/live/static/crt/2_rss_now.xml'))
feeds.append((u'Features: Travel', u'http://www.dispatch.com/live/static/crt/2_rss_travel.xml'))
feeds.append((u'Features: Home and Garden', u'http://www.dispatch.com/live/static/crt/2_rss_homegarden.xml'))
feeds.append((u'Features: Faith and Values', u'http://www.dispatch.com/live/static/crt/2_rss_faithvalues.xml'))
#feeds.append((u'', u''))
feeds = [
('Local',
'http://www.dispatch.com/content/syndication/news_local-state.xml'),
('National',
'http://www.dispatch.com/content/syndication/news_national.xml'),
('Business',
'http://www.dispatch.com/content/syndication/news_business.xml'),
('Editorials',
'http://www.dispatch.com/content/syndication/opinion_editorials.xml'),
('Columnists',
'http://www.dispatch.com/content/syndication/opinion_columns.xml'),
('Life and Arts',
'http://www.dispatch.com/content/syndication/lae_life-and-arts.xml'),
('OSU Sports',
'http://www.dispatch.com/content/syndication/sports_osu.xml'),
('Auto Racing',
'http://www.dispatch.com/content/syndication/sports_auto-racing.xml'),
('Outdoors',
'http://www.dispatch.com/content/syndication/sports_outdoors.xml'),
('Bengals',
'http://www.dispatch.com/content/syndication/sports_bengals.xml'),
('Indians',
'http://www.dispatch.com/content/syndication/sports_indians.xml'),
('Clippers',
'http://www.dispatch.com/content/syndication/sports_clippers.xml'),
('Crew',
'http://www.dispatch.com/content/syndication/sports_crew.xml'),
('Reds',
'http://www.dispatch.com/content/syndication/sports_reds.xml'),
('Blue Jackets',
'http://www.dispatch.com/content/syndication/sports_bluejackets.xml'),
]
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'colhed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'hed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subhed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'date'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'byline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'srcline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'body'}))
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'id': 'middle-story-ad-container'}))
extra_css = '''
body {font-family:verdana,arial,helvetica,geneva,sans-serif ;}
a {text-decoration: none; color: blue;}
div.colhed {font-weight: bold;}
div.hed {font-size: xx-large; font-weight: bold; margin-bottom: 0.2em;}
div.subhed {font-size: large;}
div.date {font-size: x-small; font-style: italic; color: #666666; margin-top: 0.4em; margin-bottom: 0.4em;}
div.byline, div.srcline {font-size: small; color: #696969;}
'''

View File

@ -0,0 +1,58 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
class Ekathimerini(BasicNewsRecipe):
title = 'ekathimerini'
__author__ = 'Thomas Scholl'
description = 'News from Greece, English edition'
masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
max_articles_per_feed = 100
oldest_article = 100
publisher = 'Kathimerini'
category = 'news, GR'
language = 'en_GR'
encoding = 'windows-1253'
conversion_options = { 'linearize_tables': True}
no_stylesheets = True
delay = 1
keep_only_tags = [dict(name='td', attrs={'class':'news'})]
rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
def find_articles(self, idx, category):
for article in idx.findAll('item'):
cat = u''
cat_elem = article.find('subcat')
if cat_elem:
cat = self.tag_to_string(cat_elem)
if cat == category:
desc_html = self.tag_to_string(article.find('description'))
description = self.tag_to_string(BeautifulSoup(desc_html))
a = {
'title': self.tag_to_string(article.find('title')),
'url': self.tag_to_string(article.find('link')),
'description': description,
'date' : self.tag_to_string(article.find('pubdate')),
}
yield a
def parse_index(self):
idx_contents = self.browser.open(self.rss_url).read()
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')]))
cats.sort()
feeds = [(u'News',list(self.find_articles(idx, u'')))]
for cat in cats:
feeds.append((cat.capitalize(), list(self.find_articles(idx, cat))))
return feeds
def print_version(self, url):
return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')

View File

@ -33,7 +33,7 @@ class ElPais(BasicNewsRecipe):
remove_javascript = True
no_stylesheets = True
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','cuerpo_noticia','caja_despiece']})]
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '

10
recipes/frandroid.recipe Normal file
View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318572550(BasicNewsRecipe):
title = u'FrAndroid'
oldest_article = 2
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'FrAndroid', u'http://feeds.feedburner.com/Frandroid')]

View File

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318572445(BasicNewsRecipe):
title = u'Google Mobile Blog'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]

47
recipes/hankyoreh.recipe Normal file
View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh(BasicNewsRecipe):
title = u'Hankyoreh'
language = 'ko'
description = u'The Hankyoreh News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 5
recursions = 1
max_articles_per_feed = 5
no_stylesheets = True
keep_only_tags = [
dict(name='tr', attrs={'height':['60px']}),
dict(id=['fontSzArea'])
]
remove_tags = [
dict(target='_blank'),
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
dict(name='iframe', attrs={'width':['590']}),
]
remove_tags_after = [
dict(target='_top')
]
feeds = [
('All News','http://www.hani.co.kr/rss/'),
('Politics','http://www.hani.co.kr/rss/politics/'),
('Economy','http://www.hani.co.kr/rss/economy/'),
('Society','http://www.hani.co.kr/rss/society/'),
('International','http://www.hani.co.kr/rss/international/'),
('Culture','http://www.hani.co.kr/rss/culture/'),
('Sports','http://www.hani.co.kr/rss/sports/'),
('Science','http://www.hani.co.kr/rss/science/'),
('Opinion','http://www.hani.co.kr/rss/opinion/'),
('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
('English Edition','http://www.hani.co.kr/rss/english_edition/'),
('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
('Multihani','http://www.hani.co.kr/rss/multihani/'),
('Lead','http://www.hani.co.kr/rss/lead/'),
('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
]

View File

@ -0,0 +1,25 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Hankyoreh
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hankyoreh21(BasicNewsRecipe):
title = u'Hankyoreh21'
language = 'ko'
description = u'The Hankyoreh21 Magazine articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 20
recursions = 1
max_articles_per_feed = 120
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='font', attrs={'class':'t18bk'}),
dict(id=['fontSzArea'])
]
feeds = [
('Hani21','http://h21.hani.co.kr/rss/ '),
]

View File

@ -1,50 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class HunMilNews(BasicNewsRecipe):
title = u'Honvedelem.hu'
oldest_article = 3
description = u'Katonah\xedrek'
language = 'hu'
lang = 'hu'
encoding = 'windows-1250'
category = 'news, military'
no_stylesheets = True
__author__ = 'Devilinside'
max_articles_per_feed = 16
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'class':'cikkoldal_cikk_cim'}),
dict(name='div', attrs={'class':'cikkoldal_cikk_alcim'}),
dict(name='div', attrs={'class':'cikkoldal_datum'}),
dict(name='div', attrs={'class':'cikkoldal_lead'}),
dict(name='div', attrs={'class':'cikkoldal_szoveg'}),
dict(name='img', attrs={'class':'ajanlo_kep_keretes'}),
]
feeds = [(u'Misszi\xf3k', u'http://www.honvedelem.hu/rss_b?c=22'),
(u'Aktu\xe1lis hazai h\xedrek', u'http://www.honvedelem.hu/rss_b?c=3'),
(u'K\xfclf\xf6ldi h\xedrek', u'http://www.honvedelem.hu/rss_b?c=4'),
(u'A h\xf3nap t\xe9m\xe1ja', u'http://www.honvedelem.hu/rss_b?c=6'),
(u'Riport', u'http://www.honvedelem.hu/rss_b?c=5'),
(u'Portr\xe9k', u'http://www.honvedelem.hu/rss_b?c=7'),
(u'Haditechnika', u'http://www.honvedelem.hu/rss_b?c=8'),
(u'Programok, esem\xe9nyek', u'http://www.honvedelem.hu/rss_b?c=12')
]

View File

@ -1,41 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.web.feeds.news import BasicNewsRecipe
class HunTechNet(BasicNewsRecipe):
title = u'TechNet'
oldest_article = 3
description = u'Az ut\xf3bbi 3 nap TechNet h\xedrei'
language = 'hu'
lang = 'hu'
encoding = 'utf-8'
__author__ = 'Devilinside'
max_articles_per_feed = 30
timefmt = ' [%Y, %b %d, %a]'
remove_tags_before = dict(name='div', attrs={'id':'c-main'})
remove_tags = [dict(name='div', attrs={'class':'wrp clr'}),
{'class' : ['screenrdr','forum','print','startlap','text_small','text_normal','text_big','email']},
]
keep_only_tags = [dict(name='div', attrs={'class':'cikk_head box'}),dict(name='div', attrs={'class':'cikk_txt box'})]
feeds = [(u'C\xedmlap',
u'http://www.technet.hu/rss/cimoldal/'), (u'TechTud',
u'http://www.technet.hu/rss/techtud/'), (u'PDA M\xe1nia',
u'http://www.technet.hu/rss/pdamania/'), (u'Telefon',
u'http://www.technet.hu/rss/telefon/'), (u'Sz\xe1m\xedt\xf3g\xe9p',
u'http://www.technet.hu/rss/notebook/'), (u'GPS',
u'http://www.technet.hu/rss/gps/')]

View File

@ -44,7 +44,11 @@ class JapanTimes(BasicNewsRecipe):
return rurl.partition('?')[0]
def print_version(self, url):
return url.replace('/cgi-bin/','/print/')
if '/rss/' in url:
return url.replace('.jp/rss/','.jp/print/')
if '/text/' in url:
return url.replace('.jp/text/','.jp/print/')
return url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):

20
recipes/korben.recipe Normal file
View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318619728(BasicNewsRecipe):
title = u'Korben'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Korben', u'http://feeds2.feedburner.com/KorbensBlog-UpgradeYourMind')]
def get_masthead_url(self):
masthead = 'http://korben.info/wp-content/themes/korben-steaw/hab/logo.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -1,36 +1,35 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
'''
Profile to download KoreaHerald
'''
from calibre.web.feeds.news import BasicNewsRecipe
class KoreaHerald(BasicNewsRecipe):
title = u'KoreaHerald'
language = 'en'
description = u'Korea Herald News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 10
recursions = 3
max_articles_per_feed = 10
no_stylesheets = True
keep_only_tags = [
dict(id=['contentLeft', '_article'])
]
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
]
feeds = [
('All News','http://www.koreaherald.com/rss/020000000000.xml'),
('National','http://www.koreaherald.com/rss/020100000000.xml'),
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
]
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
'''
Profile to download KoreaHerald
'''
from calibre.web.feeds.news import BasicNewsRecipe
class KoreaHerald(BasicNewsRecipe):
title = u'KoreaHerald'
language = 'en'
description = u'Korea Herald News articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 15
recursions = 3
max_articles_per_feed = 15
no_stylesheets = True
keep_only_tags = [
dict(id=['contentLeft', '_article'])
]
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
]
feeds = [
('National','http://www.koreaherald.com/rss/020100000000.xml'),
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
]

View File

@ -1,7 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'Kansascity Star'
title = 'Kansas City Star'
language = 'en'
__author__ = 'TonytheBookworm'
description = 'www.kansascity.com feed'

37
recipes/kyungyhang Normal file
View File

@ -0,0 +1,37 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
'''
Profile to download The Kyungyhang
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Kyungyhang(BasicNewsRecipe):
title = u'Kyungyhang'
language = 'ko'
description = u'The Kyungyhang Shinmun articles'
__author__ = 'Seongkyoun Yoo'
oldest_article = 20
recursions = 2
max_articles_per_feed = 20
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='div', attrs ={'class':['article_title_wrap']}),
dict(name='div', attrs ={'class':['article_txt']})
]
remove_tags_after = dict(id={'sub_bottom'})
remove_tags = [
dict(name='iframe'),
dict(id={'TdHot'}),
dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
dict(name='dl', attrs={'class':['CL']}),
dict(name='ul', attrs={'class':['tab']}),
]
feeds = [
('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
]

View File

@ -1,32 +1,37 @@
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version'
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
'''
http://www.repubblica.it/
'''
import re
from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe
class LaRepubblica(BasicNewsRecipe):
title = 'La Repubblica'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
publisher = 'Gruppo editoriale L\'Espresso'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 5
encoding = 'utf8'
use_embedded_content = False
#recursion = 10
no_stylesheets = True
extra_css = """
img{display: block}
"""
title = 'La Repubblica'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
publisher = 'Gruppo editoriale L\'Espresso'
category = 'News, politics, culture, economy, general interest'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 5
encoding = 'utf8'
use_embedded_content = False
no_stylesheets = True
publication_type = 'newspaper'
articles_are_obfuscated = True
temp_files = []
extra_css = """
img{display: block}
"""
remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
preprocess_regexps = [
(re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
@ -35,11 +40,28 @@ class LaRepubblica(BasicNewsRecipe):
]
def get_article_url(self, article):
link = article.get('id', article.get('guid', None))
if link is None:
return article
return link
link = BasicNewsRecipe.get_article_url(self, article)
if link and not '.repubblica.it/' in link:
link2 = article.get('id', article.get('guid', None))
if link2:
link = link2
return link.rpartition('?')[0]
def get_obfuscated_article(self, url):
count = 0
while (count < 10):
try:
response = self.browser.open(url)
html = response.read()
count = 10
except:
print "Retrying download..."
count += 1
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
keep_only_tags = [
dict(attrs={'class':'articolo'}),
dict(attrs={'class':'body-text'}),
@ -49,7 +71,7 @@ class LaRepubblica(BasicNewsRecipe):
remove_tags = [
dict(name=['object','link','meta']),
dict(name=['object','link','meta','iframe','embed']),
dict(name='span',attrs={'class':'linkindice'}),
dict(name='div', attrs={'class':'bottom-mobile'}),
dict(name='div', attrs={'id':['rssdiv','blocco']}),
@ -80,3 +102,11 @@ class LaRepubblica(BasicNewsRecipe):
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(['hgroup','deresponsabilizzazione','per']):
item.name = 'div'
item.attrs = []
for item in soup.findAll(style=True):
del item['style']
return soup

75
recipes/lepoint.recipe Normal file
View File

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
LePoint.fr
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class lepoint(BasicNewsRecipe):
title = 'Le Point'
__author__ = 'calibre'
description = 'Actualités'
encoding = 'utf-8'
publisher = 'LePoint.fr'
category = 'news, France, world'
language = 'fr'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
.info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['entete_chroniqueur']}),
dict(name='div', attrs={'class':['col_article']}),
dict(name='div', attrs={'class':['signature_article']}),
dict(name='div', attrs={'class':['util_font util_article']}),
dict(name='div', attrs={'class':['util_article bottom']})
]
keep_only_tags = [dict(name='div', attrs={'class':['page_article']})]
remove_tags_after = dict(name='div', attrs={'class':['util_article bottom']})
feeds = [
(u'À la une', 'http://www.lepoint.fr/rss.xml'),
('International', 'http://www.lepoint.fr/monde/rss.xml'),
('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'),
('Sciences', 'http://www.lepoint.fr/science/rss.xml'),
('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
(u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'),
('Politique', 'http://www.lepoint.fr/politique/rss.xml'),
(u'Médias', 'http://www.lepoint.fr/medias/rss.xml'),
('Culture', 'http://www.lepoint.fr/culture/rss.xml'),
(u'Santé', 'http://www.lepoint.fr/sante/rss.xml'),
('Sport', 'http://www.lepoint.fr/sport/rss.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_masthead_url(self):
masthead = 'http://www.lepoint.fr/images/commun/logo.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

73
recipes/lexpress.recipe Normal file
View File

@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
Lexpress.fr
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class lepoint(BasicNewsRecipe):
title = 'L\'express'
__author__ = 'calibre'
description = 'Actualités'
encoding = 'cp1252'
publisher = 'LExpress.fr'
category = 'Actualité, France, Monde'
language = 'fr'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.current_parent, p.heure, .ouverture {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
#contenu-article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
.entete { font-weiht:bold;}
'''
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['barre-outil-fb']}),
dict(name='div', attrs={'class':['barre-outils']}),
dict(id='bloc-sommaire'),
dict(id='footer-article')
]
keep_only_tags = [dict(name='div', attrs={'class':['bloc-article']})]
remove_tags_after = dict(id='content-article')
feeds = [
(u'À la une', 'http://www.lexpress.fr/rss/alaune.xml'),
('International', 'http://www.lexpress.fr/rss/monde.xml'),
('Tech/Web', 'http://www.lexpress.fr/rss/high-tech.xml'),
(u'Sciences/Santé', 'http://www.lexpress.fr/rss/science-et-sante.xml'),
(u'Envronnement', 'http://www.lexpress.fr/rss/environnement.xml'),
('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
(u'Socièté', 'http://www.lexpress.fr/rss/societe.xml'),
('Politique', 'http://www.lexpress.fr/rss/politique.xml'),
(u'Médias', 'http://www.lexpress.fr/rss/medias.xml'),
('Culture', 'http://www.lexpress.fr/rss/culture.xml'),
('Sport', 'http://www.lexpress.fr/rss/sport.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_masthead_url(self):
masthead = 'http://static.lexpress.fr/imgstat/logo_lexpress.gif'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -9,39 +9,72 @@ liberation.fr
from calibre.web.feeds.news import BasicNewsRecipe
class Liberation(BasicNewsRecipe):
title = u'Liberation'
__author__ = 'Darko Miletic'
description = 'News from France'
language = 'fr'
__author__ = 'calibre'
description = 'Actualités'
category = 'Actualités, France, Monde'
language = 'fr'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
html2lrf_options = ['--base-font-size', '10']
extra_css = '''
h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
h4, h5, h2.rubrique, {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.mna-body, entry-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
keep_only_tags = [
dict(name='h1')
#,dict(name='div', attrs={'class':'object-content text text-item'})
,dict(name='div', attrs={'class':'article'})
#,dict(name='div', attrs={'class':'articleContent'})
,dict(name='div', attrs={'class':'entry'})
]
remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ]
dict(name='div', attrs={'class':'article'})
,dict(name='div', attrs={'class':'text-article m-bot-s1'})
,dict(name='div', attrs={'class':'entry'})
,dict(name='div', attrs={'class':'col_contenu'})
]
remove_tags_after = [
dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
,dict(name='p',attrs={'class':['chapo']})
,dict(id='_twitter_facebook')
]
remove_tags = [
dict(name='p', attrs={'class':'clear'})
,dict(name='ul', attrs={'class':'floatLeft clear'})
,dict(name='div', attrs={'class':'clear floatRight'})
,dict(name='object')
,dict(name='div', attrs={'class':'toolbox'})
,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'})
#,dict(name='div', attrs={'class':'clear block block-call-items'})
,dict(name='div', attrs={'class':'block-content'})
dict(name='iframe')
,dict(name='a', attrs={'class':'lnk-comments'})
,dict(name='div', attrs={'class':'toolbox'})
,dict(name='ul', attrs={'class':'share-box'})
,dict(name='ul', attrs={'class':'tool-box'})
,dict(name='ul', attrs={'class':'rub'})
,dict(name='p',attrs={'class':['chapo']})
,dict(name='p',attrs={'class':['tag']})
,dict(name='div',attrs={'class':['blokLies']})
,dict(name='div',attrs={'class':['alire']})
,dict(id='_twitter_facebook')
]
feeds = [
(u'La une', u'http://www.liberation.fr/rss/laune')
,(u'Monde' , u'http://www.liberation.fr/rss/monde')
,(u'Sports', u'http://www.liberation.fr/rss/sports')
(u'La une', u'http://rss.liberation.fr/rss/9/')
,(u'Monde' , u'http://www.liberation.fr/rss/10/')
,(u'Économie', u'http://www.liberation.fr/rss/13/')
,(u'Politiques', u'http://www.liberation.fr/rss/11/')
,(u'Société', u'http://www.liberation.fr/rss/12/')
,(u'Cinéma', u'http://www.liberation.fr/rss/58/')
,(u'Écran', u'http://www.liberation.fr/rss/53/')
,(u'Sports', u'http://www.liberation.fr/rss/12/')
]
def get_masthead_url(self):
masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -22,7 +22,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
publication_type = 'newspaper'
delay = 1
remove_empty_feeds = True
cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_md_1.jpg')
cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_gd_1.jpg')
masthead_url = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg'
extra_css = """ body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em}

View File

@ -4,26 +4,31 @@ __copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Hong Kong'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
# please replace the following "True" with "False". (Default: True)
__MakePeriodical__ = True
# Turn below to True if your device supports display of CJK titles
# Turn below to True if your device supports display of CJK titles (Default: False)
__UseChineseTitle__ = False
# Set it to False if you want to skip images
# Set it to False if you want to skip images (Default: True)
__KeepImages__ = True
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
__UseLife__ = True
# (HK only) It is to disable the column section which is now a premium content
__InclCols__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
__ParsePFF__ = False
# (HK only) Turn below to True if you wish hi-res images
# (HK only) It is to disable premium content (Default: False)
__InclPremium__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
__ParsePFF__ = True
# (HK only) Turn below to True if you wish hi-res images (Default: False)
__HiResImg__ = False
# Override the date returned by the program if specifying a YYYYMMDD below
__Date__ = ''
'''
Change Log:
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional.
2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -72,7 +77,7 @@ class MPRecipe(BasicNewsRecipe):
dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
dict(attrs={'class':['images']}) # for images from txt
]
if __KeepImages__:
@ -169,13 +174,22 @@ class MPRecipe(BasicNewsRecipe):
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
@ -208,18 +222,21 @@ class MPRecipe(BasicNewsRecipe):
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
articles = self.parse_section2(url, keystr)
if __InclPremium__ == True:
articles = self.parse_section2_txt(url, keystr)
else:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
if __InclCols__ == True:
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@ -253,10 +270,10 @@ class MPRecipe(BasicNewsRecipe):
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
articles = self.parse_section2(url, keystr)
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url)
@ -270,18 +287,18 @@ class MPRecipe(BasicNewsRecipe):
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
articles = self.parse_section2(url, keystr)
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
if __InclCols__ == True:
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@ -333,7 +350,7 @@ class MPRecipe(BasicNewsRecipe):
url = 'http://news.mingpao.com/' + dateStr + '/' +url
# replace the url to the print-friendly version
if __ParsePFF__ == True:
if url.rfind('Redirect') <> -1:
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
url = re.sub('%2F.*%2F', '/', url)
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
@ -349,6 +366,8 @@ class MPRecipe(BasicNewsRecipe):
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
br = mechanize.Browser()
br.set_handle_redirect(False)
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
@ -359,9 +378,13 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
try:
br.open_novisit(url)
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
except:
print 'skipping a premium article'
current_articles.reverse()
return current_articles
@ -382,7 +405,7 @@ class MPRecipe(BasicNewsRecipe):
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
@ -467,53 +490,8 @@ class MPRecipe(BasicNewsRecipe):
# preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url):
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
raw_html = raw_html.replace(img, newimg)
else:
# if not found, insert _ after "
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', raw_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
gifimg = img.replace('jpg\'', 'gif\'')
try:
#print 'Original: ', url
#print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
gifurl = re.sub(r'dailynews.*txt', '', url)
#print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
#print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
#br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
raw_html = raw_html.replace(img, gifimg)
except:
#print 'GIF not found'
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'newimg: ', newimg
raw_html = raw_html.replace(img, newimg)
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
return raw_html
else:
new_html = raw_html
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
if url.rfind('_print.htm') <> -1:
# javascript based file
splitter = re.compile(r'\n')
@ -548,8 +526,8 @@ class MPRecipe(BasicNewsRecipe):
photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
return new_raw_html + '</body></html>'
else:
new_html = new_raw_html + '</body></html>'
else:
# .txt based file
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
@ -557,28 +535,105 @@ class MPRecipe(BasicNewsRecipe):
title_started = False
met_article_start_char = False
for item in splitter.split(raw_html):
item = item.strip()
if item.startswith(u'\u3010'):
met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else:
if next_is_img_txt == False:
if item.startswith('='):
if item.startswith("=@"):
print 'skip movie link'
elif item.startswith("=?"):
next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
elif item.startswith('=='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[2:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
elif item.startswith('='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[1:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else:
if met_article_start_char == False:
if title_started == False:
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
if next_is_img_txt == False and met_article_start_char == False:
if item <> '':
if title_started == False:
#print 'Title started at ', item
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else:
new_raw_html = new_raw_html + item + '<p>\n'
else:
next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n'
return new_raw_html + '</div></body></html>'
new_html = new_raw_html + '</div></body></html>'
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg)
else:
# if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg\'', 'gif\'')
try:
gifurl = re.sub(r'dailynews.*txt', '', url)
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
new_html = new_html.replace(img, newimg)
# repeat with src quoted by double quotes, for text parsed from src txt
imglist = re.findall('src="?.*?jpg"', new_html)
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg"', 'gif"')
try:
#print 'url', url
pos = url.rfind('/')
gifurl = url[:pos+1]
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.find('"')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg)
return new_html
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
@ -587,7 +642,7 @@ class MPRecipe(BasicNewsRecipe):
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
@ -678,7 +733,7 @@ class MPRecipe(BasicNewsRecipe):
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:

20
recipes/omgubuntu.recipe Normal file
View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1318619832(BasicNewsRecipe):
title = u'OmgUbuntu'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Omg Ubuntu', u'http://feeds.feedburner.com/d0od')]
def get_masthead_url(self):
masthead = 'http://cdn.omgubuntu.co.uk/wp-content/themes/omgubuntu/images/logo.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

47
recipes/phoronix.recipe Normal file
View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
Fetch phoronix.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class cdnet(BasicNewsRecipe):
title = 'Phoronix'
__author__ = 'calibre'
description = 'Actualités Phoronix'
encoding = 'utf-8'
publisher = 'Phoronix.com'
category = 'news, IT, linux'
language = 'en'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 25
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
h2 {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
.KonaBody {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags = []
remove_tags_before = dict(id='phxcms_content_phx')
remove_tags_after = dict(name='div', attrs={'class':'KonaBody'})
feeds = [('Phoronix', 'http://feeds.feedburner.com/Phoronix')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,37 +1,64 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008 - 2011, Darko Miletic <darko.miletic at gmail.com>'
'''
thescotsman.scotsman.com
www.scotsman.com/the-scotsman
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheScotsman(BasicNewsRecipe):
title = u'The Scotsman'
title = 'The Scotsman'
__author__ = 'Darko Miletic'
description = 'News from Scotland'
oldest_article = 7
publisher = 'Johnston Publishing Ltd.'
category = 'news, politics, Scotland, UK'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'en_GB'
simultaneous_downloads = 1
keep_only_tags = [dict(name='div', attrs={'id':'viewarticle'})]
remove_tags = [
dict(name='div' , attrs={'class':'viewarticlepanel' })
]
language = 'en_GB'
encoding = 'utf-8'
publication_type = 'newspaper'
remove_empty_feeds = True
masthead_url = 'http://www.scotsman.com/webimage/swts_thescotsman_image_e_7_25526!image/3142543874.png_gen/derivatives/default/3142543874.png'
extra_css = 'body{font-family: Arial,Helvetica,sans-serif}'
keep_only_tags = [dict(attrs={'class':'editorialSection'})]
remove_tags_after = dict(attrs={'class':'socialBookmarkPanel'})
remove_tags = [
dict(name=['meta','iframe','object','embed','link']),
dict(attrs={'class':['secondaryArticlesNav','socialBookmarkPanel']}),
dict(attrs={'id':'relatedArticles'})
]
remove_attributes = ['lang']
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
feeds = [
(u'Latest National News', u'http://thescotsman.scotsman.com/getFeed.aspx?Format=rss&sectionid=4068'),
('UK', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7071&format=rss'),
('Scotland', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7042&format=rss'),
('International', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7000&format=rss'),
('Politics', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6990&format=rss'),
('Entertainment', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7010&format=rss'),
('Features', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6996&format=rss'),
('Opinion', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7074&format=rss'),
('Latest News' , 'http://www.scotsman.com/cmlink/1.957140' ),
('UK' , 'http://www.scotsman.com/cmlink/1.957142' ),
('Scotland' , 'http://www.scotsman.com/cmlink/1.957141' ),
('International', 'http://www.scotsman.com/cmlink/1.957143' ),
('Politics' , 'http://www.scotsman.com/cmlink/1.957044' ),
('Arts' , 'http://www.scotsman.com/cmlink/1.1804825'),
('Entertainment', 'http://www.scotsman.com/cmlink/1.957053' ),
('Sports' , 'http://www.scotsman.com/cmlink/1.957151' ),
('Business' , 'http://www.scotsman.com/cmlink/1.957156' ),
('Features' , 'http://www.scotsman.com/cmlink/1.957149' ),
('Opinion' , 'http://www.scotsman.com/cmlink/1.957054' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -10,27 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
class USAToday(BasicNewsRecipe):
title = 'USA Today'
__author__ = 'Kovid Goyal'
oldest_article = 1
publication_type = 'newspaper'
timefmt = ''
max_articles_per_feed = 20
language = 'en'
no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \
.byline {font-family: monospace; \
text-align: left; \
margin-bottom: 1em;}\n \
.image {text-align: center;}\n \
.caption {text-align: center; \
font-size: smaller; \
font-style: italic}\n \
.credit {text-align: right; \
margin-bottom: 0em; \
font-size: smaller;}\n \
.articleBody {text-align: left;}\n '
#simultaneous_downloads = 1
title = 'USA Today'
__author__ = 'calibre'
description = 'newspaper'
encoding = 'utf-8'
publisher = 'usatoday.com'
category = 'news, usa'
language = 'en'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
#post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
#post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
@ -43,15 +44,18 @@ class USAToday(BasicNewsRecipe):
('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
]
keep_only_tags = [dict(attrs={'class':'story'})]
remove_tags = [
dict(attrs={'class':[
'share',
'reprints',
'inline-h3',
'info-extras',
'info-extras rounded',
'inset',
'ppy-outer',
'ppy-caption',
'comments',
@ -61,9 +65,13 @@ class USAToday(BasicNewsRecipe):
'tags',
'bottom-tools',
'sponsoredlinks',
'corrections'
]}),
dict(name='ul', attrs={'class':'inside-copy'}),
dict(id=['pluck']),
]
dict(id=['updated']),
dict(id=['post-date-updated'])
]
def get_masthead_url(self):

68
recipes/zdnet.fr.recipe Normal file
View File

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
'''
Fetch zdnet.fr
'''
from calibre.web.feeds.news import BasicNewsRecipe
class zdnet(BasicNewsRecipe):
title = 'ZDNet.fr'
__author__ = 'calibre'
description = 'Actualités'
encoding = 'utf-8'
publisher = 'ZDNet.fr'
category = 'Actualité, Informatique, IT'
language = 'fr'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 15
no_stylesheets = True
remove_empty_feeds = True
filterDuplicates = True
extra_css = '''
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
.contentmetadata p {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
#content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
'''
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['toolbox']}),
dict(name='div', attrs={'class':['clear clearfix']}),
dict(id='emailtoafriend'),
dict(id='storyaudio'),
dict(id='fbtwContainer'),
dict(name='h5')
]
remove_tags_before = dict(id='leftcol')
remove_tags_after = dict(id='content')
feeds = [
('Informatique', 'http://www.zdnet.fr/feeds/rss/actualites/informatique/'),
('Internet', 'http://www.zdnet.fr/feeds/rss/actualites/internet/'),
('Telecom', 'http://www.zdnet.fr/feeds/rss/actualites/telecoms/')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_masthead_url(self):
masthead = 'http://www.zdnet.fr/images/base/logo.png'
br = BasicNewsRecipe.get_browser()
try:
br.open(masthead)
except:
self.log("\nCover unavailable")
masthead = None
return masthead

View File

@ -1,7 +1,7 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:html="http://www.w3.org/1999/xhtml"
xmlns="http://www.w3.org/1999/xhtml"
xmlns:rtf="http://rtf2xml.sourceforge.net/"
xmlns:c="calibre"
extension-element-prefixes="c"
@ -63,11 +63,16 @@
</xsl:template>
<xsl:template name = "para">
<xsl:if test = "normalize-space(.) or child::*">
<xsl:element name = "p">
<xsl:call-template name = "para-content"/>
</xsl:element>
</xsl:if>
<xsl:element name = "p">
<xsl:choose>
<xsl:when test = "normalize-space(.) or child::*">
<xsl:call-template name = "para-content"/>
</xsl:when>
<xsl:otherwise>
<xsl:text>&#160;</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:template>
<xsl:template name = "para_off">
@ -149,7 +154,7 @@
<xsl:template match="rtf:doc-information" mode="header">
<link rel="stylesheet" type="text/css" href="styles.css"/>
<xsl:if test="not(rtf:title)">
<title>unamed</title>
<title>unnamed</title>
</xsl:if>
<xsl:apply-templates/>
</xsl:template>
@ -445,7 +450,10 @@
<xsl:template match = "rtf:field[@type='hyperlink']">
<xsl:element name ="a">
<xsl:attribute name = "href"><xsl:if test="not(contains(@link, '/'))">#</xsl:if><xsl:value-of select = "@link"/></xsl:attribute>
<xsl:attribute name = "href">
<xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
<xsl:value-of select = "@link"/>
</xsl:attribute>
<xsl:apply-templates/>
</xsl:element>
</xsl:template>

View File

@ -9,49 +9,49 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-09-27 14:31+0000\n"
"PO-Revision-Date: 2011-09-27 18:23+0000\n"
"Last-Translator: Kovid Goyal <Unknown>\n"
"PO-Revision-Date: 2011-10-15 17:29+0000\n"
"Last-Translator: Devilinside <Unknown>\n"
"Language-Team: Hungarian <debian-l10n-hungarian@lists.d.o>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-09-28 04:50+0000\n"
"X-Generator: Launchpad (build 14049)\n"
"X-Launchpad-Export-Date: 2011-10-16 05:14+0000\n"
"X-Generator: Launchpad (build 14124)\n"
"X-Poedit-Country: HUNGARY\n"
"Language: hu\n"
"X-Poedit-Language: Hungarian\n"
#. name for aaa
msgid "Ghotuo"
msgstr ""
msgstr "Ghotuo"
#. name for aab
msgid "Alumu-Tesu"
msgstr ""
msgstr "Alumu-Tesu"
#. name for aac
msgid "Ari"
msgstr ""
msgstr "Ari"
#. name for aad
msgid "Amal"
msgstr ""
msgstr "Amal"
#. name for aae
msgid "Albanian; Arbëreshë"
msgstr ""
msgstr "Albán; Arbëreshë"
#. name for aaf
msgid "Aranadan"
msgstr ""
msgstr "Aranadan"
#. name for aag
msgid "Ambrak"
msgstr ""
msgstr "Ambrak"
#. name for aah
msgid "Arapesh; Abu'"
msgstr ""
msgstr "Arapesh; Abu'"
#. name for aai
msgid "Arifama-Miniafia"
@ -75,7 +75,7 @@ msgstr ""
#. name for aao
msgid "Arabic; Algerian Saharan"
msgstr ""
msgstr "Arab; Algériai Szaharai"
#. name for aap
msgid "Arára; Pará"
@ -87,7 +87,7 @@ msgstr ""
#. name for aar
msgid "Afar"
msgstr "afar"
msgstr "Afar"
#. name for aas
msgid "Aasáx"
@ -498,10 +498,9 @@ msgstr ""
msgid "Tapei"
msgstr ""
# src/trans.h:281 src/trans.h:318
#. name for afr
msgid "Afrikaans"
msgstr "afrikaans"
msgstr "Afrikaans"
#. name for afs
msgid "Creole; Afro-Seminole"
@ -801,7 +800,7 @@ msgstr ""
#. name for aka
msgid "Akan"
msgstr "akan"
msgstr "Akan"
#. name for akb
msgid "Batak Angkola"
@ -1015,10 +1014,9 @@ msgstr ""
msgid "Amarag"
msgstr ""
# src/trans.h:283
#. name for amh
msgid "Amharic"
msgstr "amhara"
msgstr "Amhara"
#. name for ami
msgid "Amis"
@ -1425,10 +1423,9 @@ msgstr ""
msgid "Arrarnta; Western"
msgstr ""
# src/trans.h:294
#. name for arg
msgid "Aragonese"
msgstr "aragóniai"
msgstr "Aragóniai"
#. name for arh
msgid "Arhuaco"
@ -1548,7 +1545,7 @@ msgstr ""
#. name for asm
msgid "Assamese"
msgstr "asszámi"
msgstr "Asszámi"
#. name for asn
msgid "Asuriní; Xingú"
@ -1790,10 +1787,9 @@ msgstr ""
msgid "Arabic; Uzbeki"
msgstr ""
# src/trans.h:283
#. name for ava
msgid "Avaric"
msgstr "avar"
msgstr "Avar"
#. name for avb
msgid "Avau"
@ -1805,7 +1801,7 @@ msgstr ""
#. name for ave
msgid "Avestan"
msgstr "aveszti"
msgstr "Avesztai"
#. name for avi
msgid "Avikam"
@ -1945,7 +1941,7 @@ msgstr ""
#. name for ayc
msgid "Aymara; Southern"
msgstr ""
msgstr "Ajmara; Déli"
#. name for ayd
msgid "Ayabadhu"
@ -1977,7 +1973,7 @@ msgstr ""
#. name for aym
msgid "Aymara"
msgstr "aymara"
msgstr "Ajmara"
#. name for ayn
msgid "Arabic; Sanaani"
@ -1997,7 +1993,7 @@ msgstr ""
#. name for ayr
msgid "Aymara; Central"
msgstr ""
msgstr "Ajmara; Közép"
#. name for ays
msgid "Ayta; Sorsogon"
@ -2025,12 +2021,11 @@ msgstr ""
#. name for azb
msgid "Azerbaijani; South"
msgstr ""
msgstr "Azeri; Déli"
# src/trans.h:311
#. name for aze
msgid "Azerbaijani"
msgstr "azeri"
msgstr "Azeri"
#. name for azg
msgid "Amuzgo; San Pedro Amuzgos"
@ -2038,7 +2033,7 @@ msgstr ""
#. name for azj
msgid "Azerbaijani; North"
msgstr ""
msgstr "Azeri; Északi"
#. name for azm
msgid "Amuzgo; Ipalapa"
@ -2090,7 +2085,7 @@ msgstr ""
#. name for bak
msgid "Bashkir"
msgstr "baskír"
msgstr "Baskír"
#. name for bal
msgid "Baluchi"
@ -2115,7 +2110,7 @@ msgstr ""
#. name for bar
msgid "Bavarian"
msgstr ""
msgstr "Bajor"
#. name for bas
msgid "Basa (Cameroon)"
@ -2497,10 +2492,9 @@ msgstr "beja"
msgid "Bebeli"
msgstr ""
# src/trans.h:286
#. name for bel
msgid "Belarusian"
msgstr "belorusz"
msgstr "Belarusz"
#. name for bem
msgid "Bemba (Zambia)"
@ -2508,7 +2502,7 @@ msgstr ""
#. name for ben
msgid "Bengali"
msgstr "bengáli"
msgstr "Bengáli"
#. name for beo
msgid "Beami"
@ -3510,10 +3504,9 @@ msgstr ""
msgid "Borôro"
msgstr ""
# src/trans.h:309
#. name for bos
msgid "Bosnian"
msgstr "bosnyák"
msgstr "Bosnyák"
#. name for bot
msgid "Bongo"
@ -3685,7 +3678,7 @@ msgstr ""
#. name for bqn
msgid "Bulgarian Sign Language"
msgstr ""
msgstr "Bolgár jelnyelv"
#. name for bqo
msgid "Balo"
@ -4078,10 +4071,9 @@ msgstr ""
msgid "Bugawac"
msgstr ""
# src/trans.h:285
#. name for bul
msgid "Bulgarian"
msgstr "bolgár"
msgstr "Bolgár"
#. name for bum
msgid "Bulu (Cameroon)"
@ -7445,10 +7437,9 @@ msgstr ""
msgid "Semimi"
msgstr ""
# src/trans.h:284
#. name for eus
msgid "Basque"
msgstr "baszk"
msgstr "Baszk"
#. name for eve
msgid "Even"
@ -7534,10 +7525,9 @@ msgstr ""
msgid "Fang (Equatorial Guinea)"
msgstr ""
# src/trans.h:294
#. name for fao
msgid "Faroese"
msgstr "feröi"
msgstr "Feröeri"
#. name for fap
msgid "Palor"
@ -29414,7 +29404,7 @@ msgstr ""
#. name for xzp
msgid "Zapotec; Ancient"
msgstr ""
msgstr "Zapoték; Ősi"
#. name for yaa
msgid "Yaminahua"
@ -30326,27 +30316,27 @@ msgstr ""
#. name for zaa
msgid "Zapotec; Sierra de Juárez"
msgstr ""
msgstr "Zapoték; Sierra de Juárezi"
#. name for zab
msgid "Zapotec; San Juan Guelavía"
msgstr ""
msgstr "Zapoték; San Juan Guelavíai"
#. name for zac
msgid "Zapotec; Ocotlán"
msgstr ""
msgstr "Zapoték; Ocotláni"
#. name for zad
msgid "Zapotec; Cajonos"
msgstr "zapoték; Cajonos"
msgstr "Zapoték; Cajonesi"
#. name for zae
msgid "Zapotec; Yareni"
msgstr "zapoték; Yareni"
msgstr "Zapoték; Yareni"
#. name for zaf
msgid "Zapotec; Ayoquesco"
msgstr ""
msgstr "Zapoték; Ayoquescoi"
#. name for zag
msgid "Zaghawa"
@ -30358,7 +30348,7 @@ msgstr "zangval"
#. name for zai
msgid "Zapotec; Isthmus"
msgstr "zapoték; Isthmus"
msgstr "Zapoték; Isthmusi"
#. name for zaj
msgid "Zaramo"
@ -30374,31 +30364,31 @@ msgstr "zozu"
#. name for zam
msgid "Zapotec; Miahuatlán"
msgstr ""
msgstr "Zapoték; Miahuatláni"
#. name for zao
msgid "Zapotec; Ozolotepec"
msgstr ""
msgstr "Zapoték; Ozolotepeci"
#. name for zap
msgid "Zapotec"
msgstr "zapoték"
msgstr "Zapoték"
#. name for zaq
msgid "Zapotec; Aloápam"
msgstr ""
msgstr "Zapoték; Aloápami"
#. name for zar
msgid "Zapotec; Rincón"
msgstr "zapoték; Rincón"
msgstr "Zapoték; Rincóni"
#. name for zas
msgid "Zapotec; Santo Domingo Albarradas"
msgstr ""
msgstr "Zapoték; Santo Domingo Albarradasi"
#. name for zat
msgid "Zapotec; Tabaa"
msgstr "zapoték; Tabaa"
msgstr "Zapoték; Tabaa-i"
# src/trans.h:193
#. name for zau
@ -30407,15 +30397,15 @@ msgstr "zangskari"
#. name for zav
msgid "Zapotec; Yatzachi"
msgstr ""
msgstr "Zapoték; Yatzachi-i"
#. name for zaw
msgid "Zapotec; Mitla"
msgstr "zapoték; Mitla"
msgstr "Zapoték; Mitlai"
#. name for zax
msgid "Zapotec; Xadani"
msgstr "zapoték; Xadani"
msgstr "Zapoték; Xadani-i"
#. name for zay
msgid "Zayse-Zergulla"
@ -30991,7 +30981,7 @@ msgstr "tokano"
#. name for zul
msgid "Zulu"
msgstr "zulu"
msgstr "Zulu"
# src/trans.h:316
#. name for zum

View File

@ -206,7 +206,7 @@ class Resources(Command):
function_dict = {}
import inspect
from calibre.utils.formatter_functions import formatter_functions
for obj in formatter_functions.get_builtins().values():
for obj in formatter_functions().get_builtins().values():
eval_func = inspect.getmembers(obj,
lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
try:

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 22)
numeric_version = (0, 8, 23)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -341,7 +341,9 @@ class DB(object):
if f['is_custom']]
for f in fmvals:
self.create_custom_column(f['label'], f['name'],
f['datatype'], f['is_multiple'] is not None,
f['datatype'],
(f['is_multiple'] is not None and
len(f['is_multiple']) > 0),
f['is_editable'], f['display'])
defs = self.prefs.defaults

View File

@ -49,6 +49,15 @@ class ANDROID(USBMS):
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
0x70c6 : [0x226]
},
# Freescale
0x15a2 : {
0x0c01 : [0x226]
},
# Alcatel
0x05c6 : {
0x9018 : [0x0226],
},
# Sony Ericsson
0xfce : {
@ -68,7 +77,7 @@ class ANDROID(USBMS):
# Samsung
0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400],
0x681c : [0x0222, 0x0224, 0x0400],
0x681c : [0x0222, 0x0223, 0x0224, 0x0400],
0x6640 : [0x0100],
0x685b : [0x0400],
0x685e : [0x0400],
@ -139,7 +148,8 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -150,7 +160,7 @@ class ANDROID(USBMS):
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',

View File

@ -62,7 +62,7 @@ class DevicePlugin(Plugin):
#: Icon for this device
icon = I('reader.png')
# Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
# Encapsulates an annotation fetched from the device
UserAnnotation = namedtuple('Annotation','type, value')
#: GUI displays this as a message if not None. Useful if opening can take a

View File

@ -13,6 +13,8 @@ import datetime, os, re, sys, json, hashlib
from calibre.devices.kindle.apnx import APNXBuilder
from calibre.devices.kindle.bookmark import Bookmark
from calibre.devices.usbms.driver import USBMS
from calibre.ebooks.metadata import MetaInformation
from calibre import strftime
'''
Notes on collections:
@ -164,6 +166,121 @@ class KINDLE(USBMS):
# This returns as job.result in gui2.ui.annotations_fetched(self,job)
return bookmarked_books
def generate_annotation_html(self, bookmark):
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
# Returns <div class="user_annotations"> ... </div>
last_read_location = bookmark.last_read_location
timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
percent_read = bookmark.percent_read
ka_soup = BeautifulSoup()
dtc = 0
divTag = Tag(ka_soup,'div')
divTag['class'] = 'user_annotations'
# Add the last-read location
spanTag = Tag(ka_soup, 'span')
spanTag['style'] = 'font-weight:bold'
if bookmark.book_format == 'pdf':
spanTag.insert(0,NavigableString(
_("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
dict(time=strftime(u'%x', timestamp.timetuple()),
loc=last_read_location,
pr=percent_read)))
else:
spanTag.insert(0,NavigableString(
_("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
dict(time=strftime(u'%x', timestamp.timetuple()),
loc=last_read_location,
pr=percent_read)))
divTag.insert(dtc, spanTag)
dtc += 1
divTag.insert(dtc, Tag(ka_soup,'br'))
dtc += 1
if bookmark.user_notes:
user_notes = bookmark.user_notes
annotations = []
# Add the annotations sorted by location
# Italicize highlighted text
for location in sorted(user_notes):
if user_notes[location]['text']:
annotations.append(
_('<b>Location %(dl)d &bull; %(typ)s</b><br />%(text)s<br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type'],
text=(user_notes[location]['text'] if \
user_notes[location]['type'] == 'Note' else \
'<i>%s</i>' % user_notes[location]['text'])))
else:
if bookmark.book_format == 'pdf':
annotations.append(
_('<b>Page %(dl)d &bull; %(typ)s</b><br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type']))
else:
annotations.append(
_('<b>Location %(dl)d &bull; %(typ)s</b><br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type']))
for annotation in annotations:
divTag.insert(dtc, annotation)
dtc += 1
ka_soup.insert(0,divTag)
return ka_soup
def add_annotation_to_library(self, db, db_id, annotation):
from calibre.ebooks.BeautifulSoup import Tag
bm = annotation
ignore_tags = set(['Catalog', 'Clippings'])
if bm.type == 'kindle_bookmark':
mi = db.get_metadata(db_id, index_is_id=True)
user_notes_soup = self.generate_annotation_html(bm.value)
if mi.comments:
a_offset = mi.comments.find('<div class="user_annotations">')
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
if a_offset >= 0:
mi.comments = mi.comments[:a_offset]
if ad_offset >= 0:
mi.comments = mi.comments[:ad_offset]
if set(mi.tags).intersection(ignore_tags):
return
if mi.comments:
hrTag = Tag(user_notes_soup,'hr')
hrTag['class'] = 'annotations_divider'
user_notes_soup.insert(0, hrTag)
mi.comments += unicode(user_notes_soup.prettify())
else:
mi.comments = unicode(user_notes_soup.prettify())
# Update library comments
db.set_comment(db_id, mi.comments)
# Add bookmark file to db_id
db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
bm.value.path, index_is_id=True)
elif bm.type == 'kindle_clippings':
# Find 'My Clippings' author=Kindle in database, or add
last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
mc_id = list(db.data.search_getting_ids('title:"My Clippings"', ''))
if mc_id:
db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'],
index_is_id=True)
mi = db.get_metadata(mc_id[0], index_is_id=True)
mi.comments = last_update
db.set_metadata(mc_id[0], mi)
else:
mi = MetaInformation('My Clippings', authors = ['Kindle'])
mi.tags = ['Clippings']
mi.comments = last_update
db.add_books([bm.value['path']], ['txt'], [mi])
class KINDLE2(KINDLE):

View File

@ -16,6 +16,7 @@ from calibre.devices.usbms.driver import USBMS, debug_print
from calibre import prints
from calibre.devices.usbms.books import CollectionsBookList
from calibre.utils.magick.draw import save_cover_data_to
from calibre.ptempfile import PersistentTemporaryFile
class KOBO(USBMS):
@ -76,6 +77,11 @@ class KOBO(USBMS):
self.book_class = Book
self.dbversion = 7
def create_annotations_path(self, mdata, device_path=None):
if device_path:
return device_path
return USBMS.create_annotations_path(self, mdata)
def books(self, oncard=None, end_session=True):
from calibre.ebooks.metadata.meta import path_to_ext
@ -370,7 +376,7 @@ class KOBO(USBMS):
path_prefix = '.kobo/images/'
path = self._main_prefix + path_prefix + ImageID
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed',)
file_endings = (' - iPhoneThumbnail.parsed', ' - bbMediumGridList.parsed', ' - NickelBookCover.parsed', ' - N3_LIBRARY_FULL.parsed', ' - N3_LIBRARY_GRID.parsed', ' - N3_LIBRARY_LIST.parsed', ' - N3_SOCIAL_CURRENTREAD.parsed', ' - N3_FULL.parsed',)
for ending in file_endings:
fpath = path + ending
@ -750,9 +756,12 @@ class KOBO(USBMS):
blists = {}
for i in paths:
if booklists[i] is not None:
#debug_print('Booklist: ', i)
blists[i] = booklists[i]
try:
if booklists[i] is not None:
#debug_print('Booklist: ', i)
blists[i] = booklists[i]
except IndexError:
pass
opts = self.settings()
if opts.extra_customization:
collections = [x.lower().strip() for x in
@ -843,6 +852,7 @@ class KOBO(USBMS):
' - N3_LIBRARY_FULL.parsed':(355,530),
' - N3_LIBRARY_GRID.parsed':(149,233),
' - N3_LIBRARY_LIST.parsed':(60,90),
' - N3_FULL.parsed':(600,800),
' - N3_SOCIAL_CURRENTREAD.parsed':(120,186)}
for ending, resize in file_endings.items():
@ -865,3 +875,21 @@ class KOBO(USBMS):
else:
debug_print("ImageID could not be retreived from the database")
def prepare_addable_books(self, paths):
'''
The Kobo supports an encrypted epub refered to as a kepub
Unfortunately Kobo decided to put the files on the device
with no file extension. I just hope that decision causes
them as much grief as it does me :-)
This has to make a temporary copy of the book files with a
epub extension to allow Calibre's normal processing to
deal with the file appropriately
'''
for idx, path in enumerate(paths):
if path.find('kepub') >= 0:
with closing(open(path)) as r:
tf = PersistentTemporaryFile(suffix='.epub')
tf.write(r.read())
paths[idx] = tf.name
return paths

View File

@ -207,8 +207,11 @@ class PRS505(USBMS):
c = self.initialize_XML_cache()
blists = {}
for i in c.paths:
if booklists[i] is not None:
blists[i] = booklists[i]
try:
if booklists[i] is not None:
blists[i] = booklists[i]
except IndexError:
pass
opts = self.settings()
if opts.extra_customization:
collections = [x.strip() for x in

View File

@ -14,12 +14,13 @@ Device driver for the SONY T1 devices
import os, time, re
import sqlite3 as sqlite
from contextlib import closing
from datetime import date
from calibre.devices.usbms.driver import USBMS, debug_print
from calibre.devices.usbms.device import USBDevice
from calibre.devices.usbms.books import CollectionsBookList
from calibre.devices.usbms.books import BookList
from calibre.ebooks.metadata import authors_to_sort_string
from calibre.ebooks.metadata import authors_to_sort_string, authors_to_string
from calibre.constants import islinux
DBPATH = 'Sony_Reader/database/books.db'
@ -38,7 +39,8 @@ class PRST1(USBMS):
path_sep = '/'
booklist_class = CollectionsBookList
FORMATS = ['epub', 'pdf', 'txt']
FORMATS = ['epub', 'pdf', 'txt', 'book', 'zbf'] # The last two are
# used in japan
CAN_SET_METADATA = ['collections']
CAN_DO_DEVICE_DB_PLUGBOARD = True
@ -83,18 +85,26 @@ class PRST1(USBMS):
'the same aspect ratio (width to height) as the cover. '
'Unset it if you want the thumbnail to be the maximum size, '
'ignoring aspect ratio.'),
_('Use SONY Author Format (First Author Only)') +
':::' +
_('Set this option if you want the author on the Sony to '
'appear the same way the T1 sets it. This means it will '
'only show the first author for books with multiple authors. '
'Leave this disabled if you use Metadata Plugboards.')
]
EXTRA_CUSTOMIZATION_DEFAULT = [
', '.join(['series', 'tags']),
True,
False,
True,
False,
]
OPT_COLLECTIONS = 0
OPT_UPLOAD_COVERS = 1
OPT_REFRESH_COVERS = 2
OPT_PRESERVE_ASPECT_RATIO = 3
OPT_USE_SONY_AUTHORS = 4
plugboards = None
plugboard_func = None
@ -104,6 +114,8 @@ class PRST1(USBMS):
# that we do not preserve aspect ratio
if not self.settings().extra_customization[self.OPT_PRESERVE_ASPECT_RATIO]:
self.THUMBNAIL_WIDTH = 108
# Make sure the date offset is set to none, we'll calculate it in books.
self.device_offset = None
def windows_filter_pnp_id(self, pnp_id):
return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
@ -169,6 +181,27 @@ class PRST1(USBMS):
bl_collections.setdefault(row[0], [])
bl_collections[row[0]].append(row[1])
# collect information on offsets, but assume any
# offset we already calculated is correct
if self.device_offset is None:
query = 'SELECT file_path, modified_date FROM books'
cursor.execute(query)
time_offsets = {}
for i, row in enumerate(cursor):
comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
device_date = int(row[1]);
offset = device_date - comp_date
time_offsets.setdefault(offset, 0)
time_offsets[offset] = time_offsets[offset] + 1
try:
device_offset = max(time_offsets,key = lambda a: time_offsets.get(a))
debug_print("Device Offset: %d ms"%device_offset)
self.device_offset = device_offset
except ValueError:
debug_print("No Books To Detect Device Offset.")
for idx, book in enumerate(bl):
query = 'SELECT _id, thumbnail FROM books WHERE file_path = ?'
t = (book.lpath,)
@ -238,6 +271,7 @@ class PRST1(USBMS):
opts = self.settings()
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
use_sony_authors = opts.extra_customization[self.OPT_USE_SONY_AUTHORS]
cursor = connection.cursor()
@ -267,15 +301,21 @@ class PRST1(USBMS):
else:
author = authors_to_sort_string(newmi.authors)
else:
author = newmi.authors[0]
if use_sony_authors:
author = newmi.authors[0]
else:
author = authors_to_string(newmi.authors)
except:
author = _('Unknown')
title = newmi.title or _('Unknown')
# Get modified date
modified_date = os.path.getmtime(book.path)
time_offset = time.altzone if time.daylight else time.timezone
modified_date = (modified_date - time_offset) * 1000
modified_date = os.path.getmtime(book.path) * 1000
if self.device_offset is not None:
modified_date = modified_date + self.device_offset
else:
time_offset = -time.altzone if time.daylight else -time.timezone
modified_date = modified_date + (time_offset * 1000)
if lpath not in db_books:
query = '''
@ -306,6 +346,9 @@ class PRST1(USBMS):
self.upload_book_cover(connection, book, source_id)
db_books[lpath] = None
if self.is_sony_periodical(book):
self.periodicalize_book(connection, book)
for book, bookId in db_books.items():
if bookId is not None:
# Remove From Collections
@ -479,3 +522,52 @@ class PRST1(USBMS):
connection.commit()
cursor.close()
def is_sony_periodical(self, book):
if _('News') not in book.tags:
return False
if not book.lpath.lower().endswith('.epub'):
return False
if book.pubdate.date() < date(2010, 10, 17):
return False
return True
def periodicalize_book(self, connection, book):
if not self.is_sony_periodical(book):
return
name = None
if '[' in book.title:
name = book.title.split('[')[0].strip()
if len(name) < 4:
name = None
if not name:
try:
name = [t for t in book.tags if t != _('News')][0]
except:
name = None
if not name:
name = book.title
pubdate = None
try:
pubdate = int(time.mktime(book.pubdate.timetuple()) * 1000)
except:
pass
cursor = connection.cursor()
query = '''
UPDATE books
SET conforms_to = 'http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0',
periodical_name = ?,
description = ?,
publication_date = ?
WHERE _id = ?
'''
t = (name, None, pubdate, book.bookId,)
cursor.execute(query, t)
connection.commit()
cursor.close()

View File

@ -1068,6 +1068,12 @@ class Device(DeviceConfig, DevicePlugin):
'''
return {}
def add_annotation_to_library(self, db, db_id, annotation):
'''
Add an annotation to the calibre library
'''
pass
def create_upload_path(self, path, mdata, fname, create_dirs=True):
path = os.path.abspath(path)
maxlen = self.MAX_PATH_LEN
@ -1147,3 +1153,6 @@ class Device(DeviceConfig, DevicePlugin):
os.makedirs(filedir)
return filepath
def create_annotations_path(self, mdata, device_path=None):
return self.create_upload_path(os.path.abspath('/<storage>'), mdata, 'x.bookmark', create_dirs=False)

View File

@ -30,7 +30,7 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'ht
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
'xps', 'oxps', 'azw4']
'xps', 'oxps', 'azw4', 'book', 'zbf']
class HTMLRenderer(object):

View File

@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
from calibre.ebooks.chm.reader import CHMReader
log.debug('Opening CHM file')
rdr = CHMReader(chm_path, log, self.opts)
rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding)
log.debug('Extracting CHM to %s' % output_dir)
rdr.extract_content(output_dir, debug_dump=debug_dump)
self._chm_reader = rdr

View File

@ -40,14 +40,14 @@ class CHMError(Exception):
pass
class CHMReader(CHMFile):
def __init__(self, input, log, opts):
def __init__(self, input, log, input_encoding=None):
CHMFile.__init__(self)
if isinstance(input, unicode):
input = input.encode(filesystem_encoding)
if not self.LoadCHM(input):
raise CHMError("Unable to open CHM file '%s'"%(input,))
self.log = log
self.opts = opts
self.input_encoding = input_encoding
self._sourcechm = input
self._contents = None
self._playorder = 0
@ -156,8 +156,8 @@ class CHMReader(CHMFile):
break
def _reformat(self, data, htmlpath):
if self.opts.input_encoding:
data = data.decode(self.opts.input_encoding)
if self.input_encoding:
data = data.decode(self.input_encoding)
try:
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
soup = BeautifulSoup(data)

View File

@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
def unarchive(self, path, tdir):
extract(path, tdir)
files = list(walk(tdir))
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
for f in files]
from calibre.customize.ui import available_input_formats
fmts = available_input_formats()
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)

View File

@ -30,9 +30,11 @@ class Worker(Thread): # Get details {{{
Get book details from amazons book page in a separate thread
'''
def __init__(self, url, result_queue, browser, log, relevance, domain, plugin, timeout=20):
def __init__(self, url, result_queue, browser, log, relevance, domain,
plugin, timeout=20, testing=False):
Thread.__init__(self)
self.daemon = True
self.testing = testing
self.url, self.result_queue = url, result_queue
self.log, self.timeout = log, timeout
self.relevance, self.plugin = relevance, plugin
@ -189,10 +191,9 @@ class Worker(Thread): # Get details {{{
self.log.exception(msg)
return
oraw = raw
raw = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True)[0]
#open('/t/t.html', 'wb').write(raw)
if '<title>404 - ' in raw:
self.log.error('URL malformed: %r'%self.url)
return
@ -211,14 +212,20 @@ class Worker(Thread): # Get details {{{
self.log.error(msg)
return
self.parse_details(root)
self.parse_details(oraw, root)
def parse_details(self, root):
def parse_details(self, raw, root):
try:
asin = self.parse_asin(root)
except:
self.log.exception('Error parsing asin for url: %r'%self.url)
asin = None
if self.testing:
import tempfile
with tempfile.NamedTemporaryFile(prefix=asin + '_',
suffix='.html', delete=False) as f:
f.write(raw)
print ('Downloaded html for', asin, 'saved in', f.name)
try:
title = self.parse_title(root)
@ -310,7 +317,7 @@ class Worker(Thread): # Get details {{{
return l.get('href').rpartition('/')[-1]
def parse_title(self, root):
tdiv = root.xpath('//h1[@class="parseasinTitle"]')[0]
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
if actual_title:
title = tostring(actual_title[0], encoding=unicode,
@ -320,11 +327,11 @@ class Worker(Thread): # Get details {{{
return re.sub(r'[(\[].*[)\]]', '', title).strip()
def parse_authors(self, root):
x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
x = '//h1[contains(@class, "parseasinTitle")]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
aname = root.xpath(x)
if not aname:
aname = root.xpath('''
//h1[@class="parseasinTitle"]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
//h1[contains(@class, "parseasinTitle")]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
''')
for x in aname:
x.tail = ''
@ -666,7 +673,8 @@ class Amazon(Source):
log.error('No matches found with query: %r'%query)
return
workers = [Worker(url, result_queue, br, log, i, domain, self) for i, url in
workers = [Worker(url, result_queue, br, log, i, domain, self,
testing=getattr(self, 'running_a_test', False)) for i, url in
enumerate(matches)]
for w in workers:
@ -740,16 +748,6 @@ if __name__ == '__main__': # tests {{{
),
( # An e-book ISBN not on Amazon, the title/author search matches
# the Kindle edition, which has different markup for ratings and
# isbn
{'identifiers':{'isbn': '9780307459671'},
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
),
( # This isbn not on amazon
{'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
'authors':['Lutz']},
@ -783,7 +781,7 @@ if __name__ == '__main__': # tests {{{
de_tests = [ # {{{
(
{'identifiers':{'isbn': '3548283519'}},
[title_test('Wer Wind sät',
[title_test('Wer Wind Sät: Der Fünfte Fall Für Bodenstein Und Kirchhoff',
exact=True), authors_test(['Nele Neuhaus'])
]
@ -835,6 +833,6 @@ if __name__ == '__main__': # tests {{{
] # }}}
test_identify_plugin(Amazon.name, com_tests)
#test_identify_plugin(Amazon.name, es_tests)
#test_identify_plugin(Amazon.name, de_tests)
# }}}

View File

@ -196,6 +196,7 @@ class Source(Plugin):
def __init__(self, *args, **kwargs):
Plugin.__init__(self, *args, **kwargs)
self.running_a_test = False # Set to True when using identify_test()
self._isbn_to_identifier_cache = {}
self._identifier_to_cover_url_cache = {}
self.cache_lock = threading.RLock()
@ -284,14 +285,15 @@ class Source(Plugin):
if authors:
# Leave ' in there for Irish names
remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]')
replace_pat = re.compile(r'[-+.:;]')
remove_pat = re.compile(r'[!@#$%^&*(){}`~"\s\[\]/]')
replace_pat = re.compile(r'[-+.:;,]')
if only_first_author:
authors = authors[:1]
for au in authors:
has_comma = ',' in au
au = replace_pat.sub(' ', au)
parts = au.split()
if ',' in au:
if has_comma:
# au probably in ln, fn form
parts = parts[1:] + parts[:1]
for tok in parts:

View File

@ -183,7 +183,11 @@ def test_identify_plugin(name, tests): # {{{
rq = Queue()
args = (log, rq, abort)
start_time = time.time()
err = plugin.identify(*args, **kwargs)
plugin.running_a_test = True
try:
err = plugin.identify(*args, **kwargs)
finally:
plugin.running_a_test = False
total_time = time.time() - start_time
times.append(total_time)
if err is not None:

View File

@ -66,12 +66,15 @@ class EXTHHeader(object):
# last update time
pass
elif id == 503: # Long title
if not title or title == _('Unknown') or \
'USER_CONTENT' in title or title.startswith('dtp_'):
try:
title = content.decode(codec)
except:
pass
# Amazon seems to regard this as the definitive book title
# rather than the title from the PDB header. In fact when
# sending MOBI files through Amazon's email service if the
# title contains non ASCII chars or non filename safe chars
# they are messed up in the PDB header
try:
title = content.decode(codec)
except:
pass
#else:
# print 'unknown record', id, repr(content)
if title:

View File

@ -212,7 +212,11 @@ class Serializer(object):
if tocref.klass == "periodical":
buf.write('<div> <div height="1em"></div>')
else:
buf.write('<div></div> <div> <h2 height="1em"><font size="+2"><b>'+tocref.title+'</b></font></h2> <div height="1em"></div>')
t = tocref.title
if isinstance(t, unicode):
t = t.encode('utf-8')
buf.write('<div></div> <div> <h2 height="1em"><font size="+2"><b>'
+t+'</b></font></h2> <div height="1em"></div>')
buf.write('<ul>')
@ -221,14 +225,17 @@ class Serializer(object):
itemhref = tocitem.href
if tocref.klass == 'periodical':
# This is a section node.
# For periodical toca, the section urls are like r'feed_\d+/index.html'
# For periodical tocs, the section urls are like r'feed_\d+/index.html'
# We dont want to point to the start of the first article
# so we change the href.
itemhref = re.sub(r'article_\d+/', '', itemhref)
self.href_offsets[itemhref].append(buf.tell())
buf.write('0000000000')
buf.write(' ><font size="+1" color="blue"><b><u>')
buf.write(tocitem.title)
t = tocitem.title
if isinstance(t, unicode):
t = t.encode('utf-8')
buf.write(t)
buf.write('</u></b></font></a></li>')
buf.write('</ul><div height="1em"></div></div><mbp:pagebreak />')

View File

@ -20,6 +20,7 @@ from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log
from calibre import guess_type, prints, prepare_string_for_xml
from calibre.ebooks.oeb.transforms.cover import CoverManager
from calibre.constants import filesystem_encoding
TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace(\
'__ar__', 'none').replace('__viewbox__', '0 0 600 800'
@ -180,6 +181,8 @@ class EbookIterator(object):
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
if not isinstance(self.base, unicode):
self.base = self.base.decode(filesystem_encoding)
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
plumber = Plumber(self.pathtoebook, self.base, self.log)
plumber.setup_options()

View File

@ -305,11 +305,13 @@ class RTFInput(InputFormatPlugin):
html = 'index.xhtml'
with open(html, 'wb') as f:
res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
#clean multiple \n
res = re.sub('\n+', '\n', res)
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
res = re.sub('\s*<body>', '<body>', res)
res = re.sub('(?<=\n)\n{2}',
u'<p>\u00a0</p>\n'.encode('utf-8'), res)
# res = re.sub('\s*<body>', '<body>', res)
# res = re.sub('(?<=\n)\n{2}',
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
f.write(res)
self.write_inline_css(inline_class, border_styles)
stream.seek(0)

View File

@ -376,13 +376,13 @@ class ParseRtf:
msg += 'self.__run_level is "%s"\n' % self.__run_level
raise RtfInvalidCodeException, msg
if self.__run_level > 1:
sys.stderr.write(_('File could be older RTF...\n'))
sys.stderr.write('File could be older RTF...\n')
if found_destination:
if self.__run_level > 1:
sys.stderr.write(_(
sys.stderr.write(
'File also has newer RTF.\n'
'Will do the best to convert.\n'
))
)
add_brackets_obj = add_brackets.AddBrackets(
in_file = self.__temp_file,
bug_handler = RtfInvalidCodeException,

View File

@ -11,11 +11,11 @@
# #
# #
#########################################################################
import sys, os, tempfile
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy, check_brackets
# note to self. This is the first module in which I use tempfile. A good idea?
"""
"""
class AddBrackets:
"""
Add brackets for old RTF.
@ -41,6 +41,7 @@ class AddBrackets:
self.__copy = copy
self.__write_to = tempfile.mktemp()
self.__run_level = run_level
def __initiate_values(self):
"""
"""
@ -82,14 +83,16 @@ class AddBrackets:
'cw<ci<subscript_' ,
'cw<ci<superscrip',
'cw<ci<underlined' ,
'cw<ul<underlined' ,
# 'cw<ul<underlined' ,
]
def __before_body_func(self, line):
"""
"""
if self.__token_info == 'mi<mk<body-open_':
self.__state = 'in_body'
self.__write_obj.write(line)
def __in_body_func(self, line):
"""
"""
@ -108,6 +111,7 @@ class AddBrackets:
self.__state = 'after_control_word'
else:
self.__write_obj.write(line)
def __after_control_word_func(self, line):
"""
"""
@ -122,6 +126,7 @@ class AddBrackets:
self.__ignore_count = self.__ob_count
else:
self.__state = 'in_body'
def __write_group(self):
"""
"""
@ -141,6 +146,7 @@ class AddBrackets:
self.__write_obj.write(inline_string)
self.__open_bracket = 1
self.__temp_group = []
def __change_permanent_group(self):
"""
use temp group to change permanent group
@ -150,6 +156,7 @@ class AddBrackets:
if token_info in self.__accept:
att = line[20:-1]
self.__inline[token_info] = att
def __ignore_func(self, line):
"""
Don't add any brackets while inside of brackets RTF has already
@ -159,12 +166,14 @@ class AddBrackets:
if self.__token_info == 'cb<nu<clos-brack'and\
self.__cb_count == self.__ignore_count:
self.__state = 'in_body'
def __check_brackets(self, in_file):
self.__check_brack_obj = check_brackets.CheckBrackets\
(file = in_file)
good_br = self.__check_brack_obj.check_brackets()[0]
if not good_br:
return 1
def add_brackets(self):
"""
"""

View File

@ -5,14 +5,57 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, datetime
from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt
from calibre.gui2 import error_dialog
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
from calibre import strftime
from calibre.gui2.actions import InterfaceAction
from calibre.devices.usbms.device import Device
from calibre.gui2.dialogs.progress import ProgressDialog
class Updater(QThread): # {{{
update_progress = pyqtSignal(int)
update_done = pyqtSignal()
def __init__(self, parent, db, device, annotation_map, done_callback):
QThread.__init__(self, parent)
self.errors = {}
self.db = db
self.keep_going = True
self.pd = ProgressDialog(_('Merging user annotations into database'), '',
0, len(annotation_map), parent=parent)
self.device = device
self.annotation_map = annotation_map
self.done_callback = done_callback
self.pd.canceled_signal.connect(self.canceled)
self.pd.setModal(True)
self.pd.show()
self.update_progress.connect(self.pd.set_value,
type=Qt.QueuedConnection)
self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
def canceled(self):
self.keep_going = False
self.pd.hide()
def run(self):
for i, id_ in enumerate(self.annotation_map):
if not self.keep_going:
break
bm = Device.UserAnnotation(self.annotation_map[id_][0],
self.annotation_map[id_][1])
try:
self.device.add_annotation_to_library(self.db, id_, bm)
except:
import traceback
self.errors[id_] = traceback.format_exc()
self.update_progress.emit(i)
self.update_done.emit()
self.done_callback(self.annotation_map.keys(), self.errors)
# }}}
class FetchAnnotationsAction(InterfaceAction):
@ -41,13 +84,21 @@ class FetchAnnotationsAction(InterfaceAction):
fmts.append(format.lower())
return fmts
def get_device_path_from_id(id_):
paths = []
for x in ('memory', 'card_a', 'card_b'):
x = getattr(self.gui, x+'_view').model()
paths += x.paths_for_db_ids(set([id_]), as_map=True)[id_]
return paths[0].path if paths else None
def generate_annotation_paths(ids, db, device):
# Generate path templates
# Individual storage mount points scanned/resolved in driver.get_annotations()
path_map = {}
for id in ids:
path = get_device_path_from_id(id)
mi = db.get_metadata(id, index_is_id=True)
a_path = device.create_upload_path(os.path.abspath('/<storage>'), mi, 'x.bookmark', create_dirs=False)
a_path = device.create_annotations_path(mi, device_path=path)
path_map[id] = dict(path=a_path, fmts=get_formats(id))
return path_map
@ -78,166 +129,6 @@ class FetchAnnotationsAction(InterfaceAction):
path_map)
def annotations_fetched(self, job):
from calibre.devices.usbms.device import Device
from calibre.ebooks.metadata import MetaInformation
from calibre.gui2.dialogs.progress import ProgressDialog
from calibre.library.cli import do_add_format
class Updater(QThread): # {{{
update_progress = pyqtSignal(int)
update_done = pyqtSignal()
FINISHED_READING_PCT_THRESHOLD = 96
def __init__(self, parent, db, annotation_map, done_callback):
QThread.__init__(self, parent)
self.db = db
self.pd = ProgressDialog(_('Merging user annotations into database'), '',
0, len(job.result), parent=parent)
self.am = annotation_map
self.done_callback = done_callback
self.pd.canceled_signal.connect(self.canceled)
self.pd.setModal(True)
self.pd.show()
self.update_progress.connect(self.pd.set_value,
type=Qt.QueuedConnection)
self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
def generate_annotation_html(self, bookmark):
# Returns <div class="user_annotations"> ... </div>
last_read_location = bookmark.last_read_location
timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
percent_read = bookmark.percent_read
ka_soup = BeautifulSoup()
dtc = 0
divTag = Tag(ka_soup,'div')
divTag['class'] = 'user_annotations'
# Add the last-read location
spanTag = Tag(ka_soup, 'span')
spanTag['style'] = 'font-weight:bold'
if bookmark.book_format == 'pdf':
spanTag.insert(0,NavigableString(
_("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
dict(time=strftime(u'%x', timestamp.timetuple()),
loc=last_read_location,
pr=percent_read)))
else:
spanTag.insert(0,NavigableString(
_("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
dict(time=strftime(u'%x', timestamp.timetuple()),
loc=last_read_location,
pr=percent_read)))
divTag.insert(dtc, spanTag)
dtc += 1
divTag.insert(dtc, Tag(ka_soup,'br'))
dtc += 1
if bookmark.user_notes:
user_notes = bookmark.user_notes
annotations = []
# Add the annotations sorted by location
# Italicize highlighted text
for location in sorted(user_notes):
if user_notes[location]['text']:
annotations.append(
_('<b>Location %(dl)d &bull; %(typ)s</b><br />%(text)s<br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type'],
text=(user_notes[location]['text'] if \
user_notes[location]['type'] == 'Note' else \
'<i>%s</i>' % user_notes[location]['text'])))
else:
if bookmark.book_format == 'pdf':
annotations.append(
_('<b>Page %(dl)d &bull; %(typ)s</b><br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type']))
else:
annotations.append(
_('<b>Location %(dl)d &bull; %(typ)s</b><br />') % \
dict(dl=user_notes[location]['displayed_location'],
typ=user_notes[location]['type']))
for annotation in annotations:
divTag.insert(dtc, annotation)
dtc += 1
ka_soup.insert(0,divTag)
return ka_soup
'''
def mark_book_as_read(self,id):
read_tag = gprefs.get('catalog_epub_mobi_read_tag')
if read_tag:
self.db.set_tags(id, [read_tag], append=True)
'''
def canceled(self):
self.pd.hide()
def run(self):
ignore_tags = set(['Catalog','Clippings'])
for (i, id) in enumerate(self.am):
bm = Device.UserAnnotation(self.am[id][0],self.am[id][1])
if bm.type == 'kindle_bookmark':
mi = self.db.get_metadata(id, index_is_id=True)
user_notes_soup = self.generate_annotation_html(bm.value)
if mi.comments:
a_offset = mi.comments.find('<div class="user_annotations">')
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
if a_offset >= 0:
mi.comments = mi.comments[:a_offset]
if ad_offset >= 0:
mi.comments = mi.comments[:ad_offset]
if set(mi.tags).intersection(ignore_tags):
continue
if mi.comments:
hrTag = Tag(user_notes_soup,'hr')
hrTag['class'] = 'annotations_divider'
user_notes_soup.insert(0,hrTag)
mi.comments += user_notes_soup.prettify()
else:
mi.comments = unicode(user_notes_soup.prettify())
# Update library comments
self.db.set_comment(id, mi.comments)
'''
# Update 'read' tag except for Catalogs/Clippings
if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD:
if not set(mi.tags).intersection(ignore_tags):
self.mark_book_as_read(id)
'''
# Add bookmark file to id
self.db.add_format_with_hooks(id, bm.value.bookmark_extension,
bm.value.path, index_is_id=True)
self.update_progress.emit(i)
elif bm.type == 'kindle_clippings':
# Find 'My Clippings' author=Kindle in database, or add
last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
mc_id = list(db.data.parse('title:"My Clippings"'))
if mc_id:
do_add_format(self.db, mc_id[0], 'TXT', bm.value['path'])
mi = self.db.get_metadata(mc_id[0], index_is_id=True)
mi.comments = last_update
self.db.set_metadata(mc_id[0], mi)
else:
mi = MetaInformation('My Clippings', authors = ['Kindle'])
mi.tags = ['Clippings']
mi.comments = last_update
self.db.add_books([bm.value['path']], ['txt'], [mi])
self.update_done.emit()
self.done_callback(self.am.keys())
# }}}
if not job.result: return
@ -246,9 +137,25 @@ class FetchAnnotationsAction(InterfaceAction):
_('User annotations generated from main library only'),
show=True)
db = self.gui.library_view.model().db
device = self.gui.device_manager.device
self.__annotation_updater = Updater(self.gui, db, job.result,
self.Dispatcher(self.gui.library_view.model().refresh_ids))
self.__annotation_updater = Updater(self.gui, db, device, job.result,
self.Dispatcher(self.annotations_updated))
self.__annotation_updater.start()
def annotations_updated(self, ids, errors):
self.gui.library_view.model().refresh_ids(ids)
if errors:
db = self.gui.library_view.model().db
entries = []
for id_, tb in errors.iteritems():
title = id_
if isinstance(id_, type(1)):
title = db.title(id_, index_is_id=True)
entries.extend([title, tb, ''])
error_dialog(self.gui, _('Some errors'),
_('Could not fetch annotations for some books. Click '
'show details to see which ones.'),
det_msg='\n'.join(entries), show=True)

View File

@ -45,7 +45,7 @@ class TemplateHighlighter(QSyntaxHighlighter):
"keyword"))
TemplateHighlighter.Rules.append((QRegExp(
"|".join([r"\b%s\b" % builtin for builtin in
formatter_functions.get_builtins()])),
formatter_functions().get_builtins()])),
"builtin"))
TemplateHighlighter.Rules.append((QRegExp(
@ -248,8 +248,8 @@ class TemplateDialog(QDialog, Ui_TemplateDialog):
except:
self.builtin_source_dict = {}
self.funcs = formatter_functions.get_functions()
self.builtins = formatter_functions.get_builtins()
self.funcs = formatter_functions().get_functions()
self.builtins = formatter_functions().get_builtins()
func_names = sorted(self.funcs)
self.function.clear()

View File

@ -1239,11 +1239,14 @@ class DeviceBooksModel(BooksModel): # {{{
def paths(self, rows):
return [self.db[self.map[r.row()]].path for r in rows ]
def paths_for_db_ids(self, db_ids):
res = []
def paths_for_db_ids(self, db_ids, as_map=False):
res = defaultdict(list) if as_map else []
for r,b in enumerate(self.db):
if b.application_id in db_ids:
res.append((r,b))
if as_map:
res[b.application_id].append(b)
else:
res.append((r,b))
return res
def get_collections_with_ids(self):

View File

@ -82,8 +82,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
traceback.print_exc()
self.builtin_source_dict = {}
self.funcs = formatter_functions.get_functions()
self.builtins = formatter_functions.get_builtins_and_aliases()
self.funcs = formatter_functions().get_functions()
self.builtins = formatter_functions().get_builtins_and_aliases()
self.build_function_names_box()
self.function_name.currentIndexChanged[str].connect(self.function_index_changed)
@ -217,13 +217,13 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
pass
def commit(self):
formatter_functions.reset_to_builtins()
formatter_functions().reset_to_builtins()
pref_value = []
for f in self.funcs:
if f in self.builtins:
continue
func = self.funcs[f]
formatter_functions.register_function(func)
formatter_functions().register_function(func)
pref_value.append((func.name, func.doc, func.arg_count, func.program_text))
self.db.prefs.set('user_template_functions', pref_value)

View File

@ -214,7 +214,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
fmvals = [f for f in default_prefs['field_metadata'].values() if f['is_custom']]
for f in fmvals:
self.create_custom_column(f['label'], f['name'], f['datatype'],
f['is_multiple'] is not None, f['is_editable'], f['display'])
f['is_multiple'] is not None and len(f['is_multiple']) > 0,
f['is_editable'], f['display'])
self.initialize_dynamic()
def get_property(self, idx, index_is_id=False, loc=-1):
@ -302,7 +303,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if cats_changed:
self.prefs.set('user_categories', user_cats)
load_user_template_functions(self.prefs.get('user_template_functions', []))
if not self.is_second_db:
load_user_template_functions(self.prefs.get('user_template_functions', []))
self.conn.executescript('''
DROP TRIGGER IF EXISTS author_insert_trg;
@ -2103,7 +2105,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
user_mi = mi.get_all_user_metadata(make_copy=False)
for key in user_mi.iterkeys():
if key in self.field_metadata and \
user_mi[key]['datatype'] == self.field_metadata[key]['datatype']:
user_mi[key]['datatype'] == self.field_metadata[key]['datatype'] and \
(user_mi[key]['datatype'] != 'text' or
user_mi[key]['is_multiple'] == self.field_metadata[key]['is_multiple']):
val = mi.get(key, None)
if force_changes or val is not None:
doit(self.set_custom, id, val=val, extra=mi.get_extra(key),

View File

@ -150,21 +150,12 @@ class Formatter(TemplateFormatter):
traceback.print_exc()
b = None
if b is not None and b['datatype'] == 'composite':
val = b.get('#value#', None)
if val is not None:
return val.replace('/', '_').replace('\\', '_')
if key in self.composite_values:
self.composite_values[key] = val
return self.composite_values[key]
try:
# We really should not get here, but it is safer to try
self.composite_values[key] = 'RECURSIVE_COMPOSITE FIELD (S2D) ' + key
self.composite_values[key] = \
self.vformat(b['display']['composite_template'],
[], kwargs).replace('/', '_').replace('\\', '_')
return self.composite_values[key]
except Exception, e:
return unicode(e)
self.composite_values[key] = 'RECURSIVE_COMPOSITE FIELD (S2D) ' + key
self.composite_values[key] = \
self.vformat(b['display']['composite_template'], [], kwargs)
return self.composite_values[key]
if key in kwargs:
val = kwargs[key]
if isinstance(val, list):
@ -179,13 +170,6 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
sanitize_func=ascii_filename, replace_whitespace=False,
to_lowercase=False, safe_format=True):
# Note: the mi argument is assumed to be an instance of Metadata returned
# by db.get_metadata(). Reason: the composite columns should have already
# been evaluated, which get_metadata does. If the mi is something else and
# if the template uses composite columns, then a best-efforts attempt is
# made to evaluate them. This will fail if the template uses a user-defined
# template function.
tsorder = tweaks['save_template_title_series_sorting']
format_args = FORMAT_ARGS.copy()
format_args.update(mi.all_non_none_fields())
@ -374,6 +358,8 @@ def do_save_book_to_disk(id_, mi, cover, plugboards,
newmi.template_to_attribute(mi, cpb)
else:
newmi = mi
if cover:
newmi.cover_data = ('jpg', cover)
set_metadata(stream, newmi, fmt)
except:
if DEBUG:

View File

@ -242,6 +242,10 @@ Replace ``192.168.1.2`` with the local IP address of the computer running |app|.
If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.
.. note::
As of iOS version 5 Stanza no longer works on Apple devices. Alternatives to Stanza are discussed `here <http://www.mobileread.com/forums/showthread.php?t=152789>`_.
Using iBooks
**************
@ -251,7 +255,7 @@ Start the Safari browser and type in the IP address and port of the computer run
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
You wills ee a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
You will see a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
With the USB cable + iTunes

View File

@ -65,7 +65,7 @@ def generate_template_language_help():
funcs = defaultdict(dict)
for func in formatter_functions.get_builtins().values():
for func in formatter_functions().get_builtins().values():
class_name = func.__class__.__name__
func_sig = getattr(func, 'doc')
x = func_sig.find(' -- ')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More