mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
9548696089
71
recipes/20minutes.recipe
Normal file
71
recipes/20minutes.recipe
Normal file
@ -0,0 +1,71 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||
'''
|
||||
20minutes.fr
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Minutes(BasicNewsRecipe):
|
||||
|
||||
title = '20 minutes'
|
||||
__author__ = 'calibre'
|
||||
description = 'Actualités'
|
||||
encoding = 'cp1252'
|
||||
publisher = '20minutes.fr'
|
||||
category = 'Actualités, France, Monde'
|
||||
language = 'fr'
|
||||
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 15
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
filterDuplicates = True
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||
.mna-details {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.mna-image {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.mna-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['mn-section-heading']}),
|
||||
dict(name='a', attrs={'href':['#commentaires']}),
|
||||
dict(name='div', attrs={'class':['mn-right']}),
|
||||
dict(name='div', attrs={'class':['mna-box']}),
|
||||
dict(name='div', attrs={'class':['mna-comment-call']}),
|
||||
dict(name='div', attrs={'class':['mna-tools']}),
|
||||
dict(name='div', attrs={'class':['mn-trilist']})
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(id='mn-article')]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class':['mna-body','mna-signature']})
|
||||
|
||||
|
||||
feeds = [
|
||||
('France', 'http://www.20minutes.fr/rss/actu-france.xml'),
|
||||
('International', 'http://www.20minutes.fr/rss/monde.xml'),
|
||||
('Tech/Web', 'http://www.20minutes.fr/rss/hightech.xml'),
|
||||
('Sciences', 'http://www.20minutes.fr/rss/sciences.xml'),
|
||||
('Economie', 'http://www.20minutes.fr/rss/economie.xml'),
|
||||
('Politique', 'http://www.20minutes.fr/rss/politique.xml'),
|
||||
(u'Médias', 'http://www.20minutes.fr/rss/media.xml'),
|
||||
('Cinema', 'http://www.20minutes.fr/rss/cinema.xml'),
|
||||
('People', 'http://www.20minutes.fr/rss/people.xml'),
|
||||
('Culture', 'http://www.20minutes.fr/rss/culture.xml'),
|
||||
('Sport', 'http://www.20minutes.fr/rss/sport.xml'),
|
||||
('Paris', 'http://www.20minutes.fr/rss/paris.xml'),
|
||||
('Lyon', 'http://www.20minutes.fr/rss/lyon.xml'),
|
||||
('Toulouse', 'http://www.20minutes.fr/rss/toulouse.xml')
|
||||
]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -22,6 +22,14 @@ class CNN(BasicNewsRecipe):
|
||||
#match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
|
||||
max_articles_per_feed = 25
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||
.cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
|
||||
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
|
||||
@ -32,7 +40,12 @@ class CNN(BasicNewsRecipe):
|
||||
remove_tags = [
|
||||
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
|
||||
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
|
||||
'cnn_strycntntrgt', 'hed_side', 'foot']},
|
||||
'cnn_strycntntrgt', 'hed_side', 'foot', 'cnn_strylftcntnt cnn_strylftcexpbx']},
|
||||
{'class':['cnn_html_media_title_new', 'cnn_html_media_title_new cnn_html_media_title_none',
|
||||
'cnnArticleGalleryCaptionControlText', 'articleGalleryNavContainer']},
|
||||
{'id':['articleGalleryNav00JumpPrev', 'articleGalleryNav00Prev',
|
||||
'articleGalleryNav00Next', 'articleGalleryNav00JumpNext']},
|
||||
{'style':['display:none']},
|
||||
dict(id=['ie_column']),
|
||||
]
|
||||
|
||||
@ -58,3 +71,12 @@ class CNN(BasicNewsRecipe):
|
||||
ans = BasicNewsRecipe.get_article_url(self, article)
|
||||
return ans.partition('?')[0]
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
masthead = None
|
||||
return masthead
|
||||
|
58
recipes/ekathemerini.recipe
Normal file
58
recipes/ekathemerini.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
|
||||
|
||||
class Ekathimerini(BasicNewsRecipe):
|
||||
title = 'ekathimerini'
|
||||
__author__ = 'Thomas Scholl'
|
||||
description = 'News from Greece, English edition'
|
||||
masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
|
||||
max_articles_per_feed = 100
|
||||
oldest_article = 100
|
||||
publisher = 'Kathimerini'
|
||||
category = 'news, GR'
|
||||
language = 'en_GR'
|
||||
encoding = 'windows-1253'
|
||||
conversion_options = { 'linearize_tables': True}
|
||||
no_stylesheets = True
|
||||
delay = 1
|
||||
keep_only_tags = [dict(name='td', attrs={'class':'news'})]
|
||||
|
||||
rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
|
||||
|
||||
def find_articles(self, idx, category):
|
||||
for article in idx.findAll('item'):
|
||||
cat = u''
|
||||
cat_elem = article.find('subcat')
|
||||
if cat_elem:
|
||||
cat = self.tag_to_string(cat_elem)
|
||||
|
||||
if cat == category:
|
||||
desc_html = self.tag_to_string(article.find('description'))
|
||||
description = self.tag_to_string(BeautifulSoup(desc_html))
|
||||
|
||||
a = {
|
||||
'title': self.tag_to_string(article.find('title')),
|
||||
'url': self.tag_to_string(article.find('link')),
|
||||
'description': description,
|
||||
'date' : self.tag_to_string(article.find('pubdate')),
|
||||
}
|
||||
yield a
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
idx_contents = self.browser.open(self.rss_url).read()
|
||||
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
||||
|
||||
cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')]))
|
||||
cats.sort()
|
||||
|
||||
feeds = [(u'News',list(self.find_articles(idx, u'')))]
|
||||
|
||||
for cat in cats:
|
||||
feeds.append((cat.capitalize(), list(self.find_articles(idx, cat))))
|
||||
|
||||
return feeds
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')
|
||||
|
@ -33,7 +33,7 @@ class ElPais(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','cuerpo_noticia','caja_despiece']})]
|
||||
|
||||
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '
|
||||
|
||||
|
8
recipes/frandroid.recipe
Normal file
8
recipes/frandroid.recipe
Normal file
@ -0,0 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
class BasicUserRecipe1318572550(AutomaticNewsRecipe):
|
||||
title = u'FrAndroid'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'FrAndroid', u'http://feeds.feedburner.com/Frandroid')]
|
8
recipes/googlemobileblog.recipe
Normal file
8
recipes/googlemobileblog.recipe
Normal file
@ -0,0 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
class BasicUserRecipe1318572445(AutomaticNewsRecipe):
|
||||
title = u'Google Mobile Blog'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]
|
50
recipes/hankyoreh.recipe
Normal file
50
recipes/hankyoreh.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
Profile to download The Hankyoreh
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
|
||||
class Hankyoreh(BasicNewsRecipe):
|
||||
title = u'Hankyoreh'
|
||||
language = 'ko'
|
||||
description = u'The Hankyoreh News articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 5
|
||||
recursions = 1
|
||||
max_articles_per_feed = 5
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [
|
||||
dict(name='tr', attrs={'height':['60px']}),
|
||||
dict(id=['fontSzArea'])
|
||||
]
|
||||
remove_tags = [
|
||||
dict(target='_blank'),
|
||||
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
|
||||
dict(name='iframe', attrs={'width':['590']}),
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(target='_top')
|
||||
]
|
||||
feeds = [
|
||||
('All News','http://www.hani.co.kr/rss/'),
|
||||
('Politics','http://www.hani.co.kr/rss/politics/'),
|
||||
('Economy','http://www.hani.co.kr/rss/economy/'),
|
||||
('Society','http://www.hani.co.kr/rss/society/'),
|
||||
('International','http://www.hani.co.kr/rss/international/'),
|
||||
('Culture','http://www.hani.co.kr/rss/culture/'),
|
||||
('Sports','http://www.hani.co.kr/rss/sports/'),
|
||||
('Science','http://www.hani.co.kr/rss/science/'),
|
||||
('Opinion','http://www.hani.co.kr/rss/opinion/'),
|
||||
('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
|
||||
('English Edition','http://www.hani.co.kr/rss/english_edition/'),
|
||||
('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
|
||||
('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
|
||||
('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
|
||||
('Multihani','http://www.hani.co.kr/rss/multihani/'),
|
||||
('Lead','http://www.hani.co.kr/rss/lead/'),
|
||||
('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
|
||||
]
|
26
recipes/hankyoreh21.recipe
Normal file
26
recipes/hankyoreh21.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
Profile to download The Hankyoreh
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Hankyoreh21(BasicNewsRecipe):
|
||||
title = u'Hankyoreh21'
|
||||
language = 'ko'
|
||||
description = u'The Hankyoreh21 Magazine articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 20
|
||||
recursions = 1
|
||||
max_articles_per_feed = 120
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
keep_only_tags = [
|
||||
dict(name='font', attrs={'class':'t18bk'}),
|
||||
dict(id=['fontSzArea'])
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Hani21','http://h21.hani.co.kr/rss/ '),
|
||||
]
|
18
recipes/korben.recipe
Normal file
18
recipes/korben.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
class BasicUserRecipe1318619728(AutomaticNewsRecipe):
|
||||
title = u'Korben'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Korben', u'http://feeds2.feedburner.com/KorbensBlog-UpgradeYourMind')]
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://korben.info/wp-content/themes/korben-steaw/hab/logo.png'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
masthead = None
|
||||
return masthead
|
@ -1,36 +1,35 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
Profile to download KoreaHerald
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class KoreaHerald(BasicNewsRecipe):
|
||||
title = u'KoreaHerald'
|
||||
language = 'en'
|
||||
description = u'Korea Herald News articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 10
|
||||
recursions = 3
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [
|
||||
dict(id=['contentLeft', '_article'])
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
|
||||
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('All News','http://www.koreaherald.com/rss/020000000000.xml'),
|
||||
('National','http://www.koreaherald.com/rss/020100000000.xml'),
|
||||
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
|
||||
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
|
||||
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
|
||||
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
|
||||
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
|
||||
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
|
||||
]
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
Profile to download KoreaHerald
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class KoreaHerald(BasicNewsRecipe):
|
||||
title = u'KoreaHerald'
|
||||
language = 'en'
|
||||
description = u'Korea Herald News articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 15
|
||||
recursions = 3
|
||||
max_articles_per_feed = 15
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [
|
||||
dict(id=['contentLeft', '_article'])
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
|
||||
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('National','http://www.koreaherald.com/rss/020100000000.xml'),
|
||||
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
|
||||
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
|
||||
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
|
||||
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
|
||||
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
|
||||
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
|
||||
]
|
||||
|
@ -1,7 +1,7 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
title = 'Kansascity Star'
|
||||
title = 'Kansas City Star'
|
||||
language = 'en'
|
||||
__author__ = 'TonytheBookworm'
|
||||
description = 'www.kansascity.com feed'
|
||||
|
37
recipes/kyungyhang
Normal file
37
recipes/kyungyhang
Normal file
@ -0,0 +1,37 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
Profile to download The Kyungyhang
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Kyungyhang(BasicNewsRecipe):
|
||||
title = u'Kyungyhang'
|
||||
language = 'ko'
|
||||
description = u'The Kyungyhang Shinmun articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 20
|
||||
recursions = 2
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs ={'class':['article_title_wrap']}),
|
||||
dict(name='div', attrs ={'class':['article_txt']})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(id={'sub_bottom'})
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(id={'TdHot'}),
|
||||
dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
|
||||
dict(name='dl', attrs={'class':['CL']}),
|
||||
dict(name='ul', attrs={'class':['tab']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
|
||||
]
|
@ -1,32 +1,37 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version'
|
||||
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
|
||||
|
||||
'''
|
||||
http://www.repubblica.it/
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LaRepubblica(BasicNewsRecipe):
|
||||
title = 'La Repubblica'
|
||||
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
|
||||
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
|
||||
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
|
||||
publisher = 'Gruppo editoriale L\'Espresso'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 5
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
#recursion = 10
|
||||
no_stylesheets = True
|
||||
extra_css = """
|
||||
img{display: block}
|
||||
"""
|
||||
title = 'La Repubblica'
|
||||
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
|
||||
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
|
||||
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
|
||||
publisher = 'Gruppo editoriale L\'Espresso'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 5
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
publication_type = 'newspaper'
|
||||
articles_are_obfuscated = True
|
||||
temp_files = []
|
||||
extra_css = """
|
||||
img{display: block}
|
||||
"""
|
||||
|
||||
remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
|
||||
@ -35,11 +40,28 @@ class LaRepubblica(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = article.get('id', article.get('guid', None))
|
||||
if link is None:
|
||||
return article
|
||||
return link
|
||||
|
||||
link = BasicNewsRecipe.get_article_url(self, article)
|
||||
if link and not '.repubblica.it/' in link:
|
||||
link2 = article.get('id', article.get('guid', None))
|
||||
if link2:
|
||||
link = link2
|
||||
return link.rpartition('?')[0]
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
count = 0
|
||||
while (count < 10):
|
||||
try:
|
||||
response = self.browser.open(url)
|
||||
html = response.read()
|
||||
count = 10
|
||||
except:
|
||||
print "Retrying download..."
|
||||
count += 1
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class':'articolo'}),
|
||||
dict(attrs={'class':'body-text'}),
|
||||
@ -49,7 +71,7 @@ class LaRepubblica(BasicNewsRecipe):
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','meta']),
|
||||
dict(name=['object','link','meta','iframe','embed']),
|
||||
dict(name='span',attrs={'class':'linkindice'}),
|
||||
dict(name='div', attrs={'class':'bottom-mobile'}),
|
||||
dict(name='div', attrs={'id':['rssdiv','blocco']}),
|
||||
@ -80,3 +102,11 @@ class LaRepubblica(BasicNewsRecipe):
|
||||
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(['hgroup','deresponsabilizzazione','per']):
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
76
recipes/lepoint.recipe
Normal file
76
recipes/lepoint.recipe
Normal file
@ -0,0 +1,76 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||
'''
|
||||
LePoint.fr
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class lepoint(BasicNewsRecipe):
|
||||
|
||||
title = 'Le Point'
|
||||
__author__ = 'calibre'
|
||||
description = 'Actualités'
|
||||
encoding = 'utf-8'
|
||||
publisher = 'LePoint.fr'
|
||||
category = 'news, France, world'
|
||||
language = 'fr'
|
||||
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 15
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
filterDuplicates = True
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||
.chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
|
||||
.info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['entete_chroniqueur']}),
|
||||
dict(name='div', attrs={'class':['col_article']}),
|
||||
dict(name='div', attrs={'class':['signature_article']}),
|
||||
dict(name='div', attrs={'class':['util_font util_article']}),
|
||||
dict(name='div', attrs={'class':['util_article bottom']})
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['page_article']})]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class':['util_article bottom']})
|
||||
|
||||
feeds = [
|
||||
(u'À la une', 'http://www.lepoint.fr/rss.xml'),
|
||||
('International', 'http://www.lepoint.fr/monde/rss.xml'),
|
||||
('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'),
|
||||
('Sciences', 'http://www.lepoint.fr/science/rss.xml'),
|
||||
('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
|
||||
(u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'),
|
||||
('Politique', 'http://www.lepoint.fr/politique/rss.xml'),
|
||||
(u'Médias', 'http://www.lepoint.fr/medias/rss.xml'),
|
||||
('Culture', 'http://www.lepoint.fr/culture/rss.xml'),
|
||||
(u'Santé', 'http://www.lepoint.fr/sante/rss.xml'),
|
||||
('Sport', 'http://www.lepoint.fr/sport/rss.xml')
|
||||
]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://www.lepoint.fr/images/commun/logo.png'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
masthead = None
|
||||
return masthead
|
74
recipes/lexpress.recipe
Normal file
74
recipes/lexpress.recipe
Normal file
@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||
'''
|
||||
Lexpress.fr
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class lepoint(BasicNewsRecipe):
|
||||
|
||||
title = 'L\'express'
|
||||
__author__ = 'calibre'
|
||||
description = 'Actualités'
|
||||
encoding = 'cp1252'
|
||||
publisher = 'LExpress.fr'
|
||||
category = 'Actualité, France, Monde'
|
||||
language = 'fr'
|
||||
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 15
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
filterDuplicates = True
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||
.current_parent, p.heure, .ouverture {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
#contenu-article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||
.entete { font-weiht:bold;}
|
||||
'''
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['barre-outil-fb']}),
|
||||
dict(name='div', attrs={'class':['barre-outils']}),
|
||||
dict(id='bloc-sommaire'),
|
||||
dict(id='footer-article')
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['bloc-article']})]
|
||||
|
||||
remove_tags_after = dict(id='content-article')
|
||||
|
||||
feeds = [
|
||||
(u'À la une', 'http://www.lexpress.fr/rss/alaune.xml'),
|
||||
('International', 'http://www.lexpress.fr/rss/monde.xml'),
|
||||
('Tech/Web', 'http://www.lexpress.fr/rss/high-tech.xml'),
|
||||
(u'Sciences/Santé', 'http://www.lexpress.fr/rss/science-et-sante.xml'),
|
||||
(u'Envronnement', 'http://www.lexpress.fr/rss/environnement.xml'),
|
||||
('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
|
||||
(u'Socièté', 'http://www.lexpress.fr/rss/societe.xml'),
|
||||
('Politique', 'http://www.lexpress.fr/rss/politique.xml'),
|
||||
(u'Médias', 'http://www.lexpress.fr/rss/medias.xml'),
|
||||
('Culture', 'http://www.lexpress.fr/rss/culture.xml'),
|
||||
('Sport', 'http://www.lexpress.fr/rss/sport.xml')
|
||||
]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://static.lexpress.fr/imgstat/logo_lexpress.gif'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
masthead = None
|
||||
return masthead
|
@ -9,39 +9,72 @@ liberation.fr
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Liberation(BasicNewsRecipe):
|
||||
|
||||
title = u'Liberation'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from France'
|
||||
language = 'fr'
|
||||
__author__ = 'calibre'
|
||||
description = 'Actualités'
|
||||
category = 'Actualités, France, Monde'
|
||||
language = 'fr'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 15
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
filterDuplicates = True
|
||||
|
||||
html2lrf_options = ['--base-font-size', '10']
|
||||
extra_css = '''
|
||||
h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||
p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
|
||||
h4, h5, h2.rubrique, {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.mna-body, entry-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1')
|
||||
#,dict(name='div', attrs={'class':'object-content text text-item'})
|
||||
,dict(name='div', attrs={'class':'article'})
|
||||
#,dict(name='div', attrs={'class':'articleContent'})
|
||||
,dict(name='div', attrs={'class':'entry'})
|
||||
]
|
||||
remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ]
|
||||
dict(name='div', attrs={'class':'article'})
|
||||
,dict(name='div', attrs={'class':'text-article m-bot-s1'})
|
||||
,dict(name='div', attrs={'class':'entry'})
|
||||
,dict(name='div', attrs={'class':'col_contenu'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
|
||||
,dict(name='p',attrs={'class':['chapo']})
|
||||
,dict(id='_twitter_facebook')
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':'clear'})
|
||||
,dict(name='ul', attrs={'class':'floatLeft clear'})
|
||||
,dict(name='div', attrs={'class':'clear floatRight'})
|
||||
,dict(name='object')
|
||||
,dict(name='div', attrs={'class':'toolbox'})
|
||||
,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'})
|
||||
#,dict(name='div', attrs={'class':'clear block block-call-items'})
|
||||
,dict(name='div', attrs={'class':'block-content'})
|
||||
dict(name='iframe')
|
||||
,dict(name='a', attrs={'class':'lnk-comments'})
|
||||
,dict(name='div', attrs={'class':'toolbox'})
|
||||
,dict(name='ul', attrs={'class':'share-box'})
|
||||
,dict(name='ul', attrs={'class':'tool-box'})
|
||||
,dict(name='ul', attrs={'class':'rub'})
|
||||
,dict(name='p',attrs={'class':['chapo']})
|
||||
,dict(name='p',attrs={'class':['tag']})
|
||||
,dict(name='div',attrs={'class':['blokLies']})
|
||||
,dict(name='div',attrs={'class':['alire']})
|
||||
,dict(id='_twitter_facebook')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'La une', u'http://www.liberation.fr/rss/laune')
|
||||
,(u'Monde' , u'http://www.liberation.fr/rss/monde')
|
||||
,(u'Sports', u'http://www.liberation.fr/rss/sports')
|
||||
(u'La une', u'http://rss.liberation.fr/rss/9/')
|
||||
,(u'Monde' , u'http://www.liberation.fr/rss/10/')
|
||||
,(u'Économie', u'http://www.liberation.fr/rss/13/')
|
||||
,(u'Politiques', u'http://www.liberation.fr/rss/11/')
|
||||
,(u'Société', u'http://www.liberation.fr/rss/12/')
|
||||
,(u'Cinéma', u'http://www.liberation.fr/rss/58/')
|
||||
,(u'Écran', u'http://www.liberation.fr/rss/53/')
|
||||
,(u'Sports', u'http://www.liberation.fr/rss/12/')
|
||||
]
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
masthead = None
|
||||
return masthead
|
||||
|
@ -22,7 +22,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
|
||||
publication_type = 'newspaper'
|
||||
delay = 1
|
||||
remove_empty_feeds = True
|
||||
cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_md_1.jpg')
|
||||
cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_gd_1.jpg')
|
||||
masthead_url = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg'
|
||||
extra_css = """ body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em}
|
||||
|
@ -4,26 +4,27 @@ __copyright__ = '2010-2011, Eddie Lau'
|
||||
# Region - Hong Kong, Vancouver, Toronto
|
||||
__Region__ = 'Hong Kong'
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
# please replace the following "True" with "False". (Default: True)
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to True if your device supports display of CJK titles
|
||||
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
|
||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
|
||||
__UseLife__ = True
|
||||
# (HK only) It is to disable the column section which is now a premium content
|
||||
__InclCols__ = False
|
||||
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
|
||||
__ParsePFF__ = False
|
||||
# (HK only) Turn below to True if you wish hi-res images
|
||||
# (HK only) It is to disable premium content (Default: False)
|
||||
__InclPremium__ = False
|
||||
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
|
||||
__ParsePFF__ = True
|
||||
# (HK only) Turn below to True if you wish hi-res images (Default: False)
|
||||
__HiResImg__ = False
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||
2011/10/04: option to get hi-res photos for the articles
|
||||
2011/09/21: fetching "column" section is made optional.
|
||||
2011/09/21: fetching "column" section is made optional.
|
||||
2011/09/18: parse "column" section stuff from source text file directly.
|
||||
2011/09/07: disable "column" section as it is no longer offered free.
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
@ -72,7 +73,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
dict(attrs={'class':['content']}), # for content from txt
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
|
||||
dict(attrs={'class':['images']}) # for images from txt
|
||||
]
|
||||
if __KeepImages__:
|
||||
@ -208,18 +209,21 @@ class MPRecipe(BasicNewsRecipe):
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if __InclPremium__ == True:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
else:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclCols__ == True:
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
@ -253,10 +257,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
# articles = self.parse_section(url)
|
||||
@ -270,18 +274,18 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclCols__ == True:
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
@ -333,7 +337,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
# replace the url to the print-friendly version
|
||||
if __ParsePFF__ == True:
|
||||
if url.rfind('Redirect') <> -1:
|
||||
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
|
||||
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
||||
url = re.sub('%2F.*%2F', '/', url)
|
||||
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||
@ -349,6 +353,8 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
@ -359,9 +365,13 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
try:
|
||||
br.open_novisit(url)
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
except:
|
||||
print 'skipping a premium article'
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
@ -382,7 +392,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
|
||||
# parse from www.mingpaovan.com
|
||||
def parse_section3(self, url, baseUrl):
|
||||
self.get_fetchdate()
|
||||
@ -470,23 +480,23 @@ class MPRecipe(BasicNewsRecipe):
|
||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||
if __HiResImg__ == True:
|
||||
# TODO: add a _ in front of an image url
|
||||
if url.rfind('news.mingpao.com') > -1:
|
||||
if url.rfind('news.mingpao.com') > -1:
|
||||
imglist = re.findall('src="?.*?jpg"', raw_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
for img in imglist:
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
try:
|
||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||
raw_html = raw_html.replace(img, gifimg)
|
||||
except:
|
||||
except:
|
||||
# find the location of the first _
|
||||
pos = img.find('_')
|
||||
if pos > -1:
|
||||
# if found, insert _ after the first _
|
||||
newimg = img[0:pos] + '_' + img[pos:]
|
||||
raw_html = raw_html.replace(img, newimg)
|
||||
else:
|
||||
else:
|
||||
# if not found, insert _ after "
|
||||
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
|
||||
elif url.rfind('life.mingpao.com') > -1:
|
||||
@ -510,7 +520,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
pos = img.rfind('/')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
#print 'newimg: ', newimg
|
||||
raw_html = raw_html.replace(img, newimg)
|
||||
raw_html = raw_html.replace(img, newimg)
|
||||
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
|
||||
return raw_html
|
||||
else:
|
||||
@ -549,10 +559,11 @@ class MPRecipe(BasicNewsRecipe):
|
||||
photo = photo.replace('class="photo"', '')
|
||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||
return new_raw_html + '</body></html>'
|
||||
else:
|
||||
else:
|
||||
# .txt based file
|
||||
splitter = re.compile(r'\n') # Match non-digits
|
||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||
next_is_mov_link = False
|
||||
next_is_img_txt = False
|
||||
title_started = False
|
||||
met_article_start_char = False
|
||||
@ -561,24 +572,35 @@ class MPRecipe(BasicNewsRecipe):
|
||||
met_article_start_char = True
|
||||
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False:
|
||||
if item.startswith('='):
|
||||
if next_is_img_txt == False and next_is_mov_link == False:
|
||||
item = item.strip()
|
||||
if item.startswith("=@"):
|
||||
next_is_mov_link = True
|
||||
elif item.startswith("=?"):
|
||||
next_is_img_txt = True
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
|
||||
elif item.startswith('='):
|
||||
next_is_img_txt = True
|
||||
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
|
||||
else:
|
||||
if met_article_start_char == False:
|
||||
if title_started == False:
|
||||
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
|
||||
title_started = True
|
||||
if item <> '':
|
||||
if next_is_img_txt == False and met_article_start_char == False:
|
||||
if title_started == False:
|
||||
#print 'Title started at ', item
|
||||
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
|
||||
title_started = True
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '<p>\n'
|
||||
new_raw_html = new_raw_html + item + '<p>\n'
|
||||
else:
|
||||
next_is_img_txt = False
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
if next_is_mov_link == True:
|
||||
next_is_mov_link = False
|
||||
else:
|
||||
next_is_img_txt = False
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
return new_raw_html + '</div></body></html>'
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
@ -587,7 +609,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
@ -678,7 +700,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
|
18
recipes/omgubuntu.recipe
Normal file
18
recipes/omgubuntu.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
class BasicUserRecipe1318619832(AutomaticNewsRecipe):
|
||||
title = u'OmgUbuntu'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Omg Ubuntu', u'http://feeds.feedburner.com/d0od')]
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://cdn.omgubuntu.co.uk/wp-content/themes/omgubuntu/images/logo.png'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
masthead = None
|
||||
return masthead
|
47
recipes/phoronix.recipe
Normal file
47
recipes/phoronix.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||
|
||||
'''
|
||||
Fetch phoronix.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class cdnet(BasicNewsRecipe):
|
||||
|
||||
title = 'Phoronix'
|
||||
__author__ = 'calibre'
|
||||
description = 'Actualités Phoronix'
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Phoronix.com'
|
||||
category = 'news, IT, linux'
|
||||
language = 'en'
|
||||
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 25
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
filterDuplicates = True
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||
h2 {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
.KonaBody {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
remove_tags = []
|
||||
|
||||
remove_tags_before = dict(id='phxcms_content_phx')
|
||||
remove_tags_after = dict(name='div', attrs={'class':'KonaBody'})
|
||||
|
||||
feeds = [('Phoronix', 'http://feeds.feedburner.com/Phoronix')]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -10,27 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class USAToday(BasicNewsRecipe):
|
||||
|
||||
title = 'USA Today'
|
||||
__author__ = 'Kovid Goyal'
|
||||
oldest_article = 1
|
||||
publication_type = 'newspaper'
|
||||
timefmt = ''
|
||||
max_articles_per_feed = 20
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
extra_css = '.headline {text-align: left;}\n \
|
||||
.byline {font-family: monospace; \
|
||||
text-align: left; \
|
||||
margin-bottom: 1em;}\n \
|
||||
.image {text-align: center;}\n \
|
||||
.caption {text-align: center; \
|
||||
font-size: smaller; \
|
||||
font-style: italic}\n \
|
||||
.credit {text-align: right; \
|
||||
margin-bottom: 0em; \
|
||||
font-size: smaller;}\n \
|
||||
.articleBody {text-align: left;}\n '
|
||||
#simultaneous_downloads = 1
|
||||
title = 'USA Today'
|
||||
__author__ = 'calibre'
|
||||
description = 'newspaper'
|
||||
encoding = 'utf-8'
|
||||
publisher = 'usatoday.com'
|
||||
category = 'news, usa'
|
||||
language = 'en'
|
||||
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 15
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
filterDuplicates = True
|
||||
|
||||
extra_css = '''
|
||||
h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||
#post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
#post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
|
||||
feeds = [
|
||||
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
|
||||
('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
|
||||
@ -43,15 +44,18 @@ class USAToday(BasicNewsRecipe):
|
||||
('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
|
||||
('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
|
||||
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
|
||||
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
|
||||
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'story'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'class':[
|
||||
'share',
|
||||
'reprints',
|
||||
'inline-h3',
|
||||
'info-extras',
|
||||
'info-extras rounded',
|
||||
'inset',
|
||||
'ppy-outer',
|
||||
'ppy-caption',
|
||||
'comments',
|
||||
@ -61,9 +65,13 @@ class USAToday(BasicNewsRecipe):
|
||||
'tags',
|
||||
'bottom-tools',
|
||||
'sponsoredlinks',
|
||||
'corrections'
|
||||
]}),
|
||||
dict(name='ul', attrs={'class':'inside-copy'}),
|
||||
dict(id=['pluck']),
|
||||
]
|
||||
dict(id=['updated']),
|
||||
dict(id=['post-date-updated'])
|
||||
]
|
||||
|
||||
|
||||
def get_masthead_url(self):
|
||||
|
68
recipes/zdnet.fr.recipe
Normal file
68
recipes/zdnet.fr.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||
|
||||
'''
|
||||
Fetch zdnet.fr
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class zdnet(BasicNewsRecipe):
|
||||
|
||||
title = 'ZDNet.fr'
|
||||
__author__ = 'calibre'
|
||||
description = 'Actualités'
|
||||
encoding = 'utf-8'
|
||||
publisher = 'ZDNet.fr'
|
||||
category = 'Actualité, Informatique, IT'
|
||||
language = 'fr'
|
||||
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 15
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
filterDuplicates = True
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||
.contentmetadata p {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||
#content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['toolbox']}),
|
||||
dict(name='div', attrs={'class':['clear clearfix']}),
|
||||
dict(id='emailtoafriend'),
|
||||
dict(id='storyaudio'),
|
||||
dict(id='fbtwContainer'),
|
||||
dict(name='h5')
|
||||
]
|
||||
|
||||
remove_tags_before = dict(id='leftcol')
|
||||
remove_tags_after = dict(id='content')
|
||||
|
||||
feeds = [
|
||||
('Informatique', 'http://www.zdnet.fr/feeds/rss/actualites/informatique/'),
|
||||
('Internet', 'http://www.zdnet.fr/feeds/rss/actualites/internet/'),
|
||||
('Telecom', 'http://www.zdnet.fr/feeds/rss/actualites/telecoms/')
|
||||
]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://www.zdnet.fr/images/base/logo.png'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(masthead)
|
||||
except:
|
||||
self.log("\nCover unavailable")
|
||||
masthead = None
|
||||
return masthead
|
@ -1,7 +1,7 @@
|
||||
<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:html="http://www.w3.org/1999/xhtml"
|
||||
xmlns="http://www.w3.org/1999/xhtml"
|
||||
xmlns:rtf="http://rtf2xml.sourceforge.net/"
|
||||
xmlns:c="calibre"
|
||||
extension-element-prefixes="c"
|
||||
@ -63,11 +63,16 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name = "para">
|
||||
<xsl:if test = "normalize-space(.) or child::*">
|
||||
<xsl:element name = "p">
|
||||
<xsl:call-template name = "para-content"/>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:element name = "p">
|
||||
<xsl:choose>
|
||||
<xsl:when test = "normalize-space(.) or child::*">
|
||||
<xsl:call-template name = "para-content"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:text> </xsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name = "para_off">
|
||||
@ -149,7 +154,7 @@
|
||||
<xsl:template match="rtf:doc-information" mode="header">
|
||||
<link rel="stylesheet" type="text/css" href="styles.css"/>
|
||||
<xsl:if test="not(rtf:title)">
|
||||
<title>unamed</title>
|
||||
<title>unnamed</title>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
@ -445,7 +450,10 @@
|
||||
|
||||
<xsl:template match = "rtf:field[@type='hyperlink']">
|
||||
<xsl:element name ="a">
|
||||
<xsl:attribute name = "href"><xsl:if test="not(contains(@link, '/'))">#</xsl:if><xsl:value-of select = "@link"/></xsl:attribute>
|
||||
<xsl:attribute name = "href">
|
||||
<xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
|
||||
<xsl:value-of select = "@link"/>
|
||||
</xsl:attribute>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
@ -49,6 +49,15 @@ class ANDROID(USBMS):
|
||||
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
|
||||
0x70c6 : [0x226]
|
||||
},
|
||||
# Freescale
|
||||
0x15a2 : {
|
||||
0x0c01 : [0x226]
|
||||
},
|
||||
|
||||
# Alcatel
|
||||
0x05c6 : {
|
||||
0x9018 : [0x0226],
|
||||
},
|
||||
|
||||
# Sony Ericsson
|
||||
0xfce : {
|
||||
@ -139,7 +148,8 @@ class ANDROID(USBMS):
|
||||
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
|
||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
|
||||
'VIZIO', 'GOOGLE', 'FREESCAL']
|
||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
@ -150,7 +160,7 @@ class ANDROID(USBMS):
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
|
||||
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
||||
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
|
||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
|
@ -62,7 +62,7 @@ class DevicePlugin(Plugin):
|
||||
#: Icon for this device
|
||||
icon = I('reader.png')
|
||||
|
||||
# Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
|
||||
# Encapsulates an annotation fetched from the device
|
||||
UserAnnotation = namedtuple('Annotation','type, value')
|
||||
|
||||
#: GUI displays this as a message if not None. Useful if opening can take a
|
||||
|
@ -13,6 +13,8 @@ import datetime, os, re, sys, json, hashlib
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
from calibre.devices.kindle.bookmark import Bookmark
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre import strftime
|
||||
|
||||
'''
|
||||
Notes on collections:
|
||||
@ -164,6 +166,121 @@ class KINDLE(USBMS):
|
||||
# This returns as job.result in gui2.ui.annotations_fetched(self,job)
|
||||
return bookmarked_books
|
||||
|
||||
def generate_annotation_html(self, bookmark):
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
|
||||
# Returns <div class="user_annotations"> ... </div>
|
||||
last_read_location = bookmark.last_read_location
|
||||
timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
|
||||
percent_read = bookmark.percent_read
|
||||
|
||||
ka_soup = BeautifulSoup()
|
||||
dtc = 0
|
||||
divTag = Tag(ka_soup,'div')
|
||||
divTag['class'] = 'user_annotations'
|
||||
|
||||
# Add the last-read location
|
||||
spanTag = Tag(ka_soup, 'span')
|
||||
spanTag['style'] = 'font-weight:bold'
|
||||
if bookmark.book_format == 'pdf':
|
||||
spanTag.insert(0,NavigableString(
|
||||
_("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
|
||||
dict(time=strftime(u'%x', timestamp.timetuple()),
|
||||
loc=last_read_location,
|
||||
pr=percent_read)))
|
||||
else:
|
||||
spanTag.insert(0,NavigableString(
|
||||
_("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
|
||||
dict(time=strftime(u'%x', timestamp.timetuple()),
|
||||
loc=last_read_location,
|
||||
pr=percent_read)))
|
||||
|
||||
divTag.insert(dtc, spanTag)
|
||||
dtc += 1
|
||||
divTag.insert(dtc, Tag(ka_soup,'br'))
|
||||
dtc += 1
|
||||
|
||||
if bookmark.user_notes:
|
||||
user_notes = bookmark.user_notes
|
||||
annotations = []
|
||||
|
||||
# Add the annotations sorted by location
|
||||
# Italicize highlighted text
|
||||
for location in sorted(user_notes):
|
||||
if user_notes[location]['text']:
|
||||
annotations.append(
|
||||
_('<b>Location %(dl)d • %(typ)s</b><br />%(text)s<br />') % \
|
||||
dict(dl=user_notes[location]['displayed_location'],
|
||||
typ=user_notes[location]['type'],
|
||||
text=(user_notes[location]['text'] if \
|
||||
user_notes[location]['type'] == 'Note' else \
|
||||
'<i>%s</i>' % user_notes[location]['text'])))
|
||||
else:
|
||||
if bookmark.book_format == 'pdf':
|
||||
annotations.append(
|
||||
_('<b>Page %(dl)d • %(typ)s</b><br />') % \
|
||||
dict(dl=user_notes[location]['displayed_location'],
|
||||
typ=user_notes[location]['type']))
|
||||
else:
|
||||
annotations.append(
|
||||
_('<b>Location %(dl)d • %(typ)s</b><br />') % \
|
||||
dict(dl=user_notes[location]['displayed_location'],
|
||||
typ=user_notes[location]['type']))
|
||||
|
||||
for annotation in annotations:
|
||||
divTag.insert(dtc, annotation)
|
||||
dtc += 1
|
||||
|
||||
ka_soup.insert(0,divTag)
|
||||
return ka_soup
|
||||
|
||||
|
||||
def add_annotation_to_library(self, db, db_id, annotation):
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
bm = annotation
|
||||
ignore_tags = set(['Catalog', 'Clippings'])
|
||||
|
||||
if bm.type == 'kindle_bookmark':
|
||||
mi = db.get_metadata(db_id, index_is_id=True)
|
||||
user_notes_soup = self.generate_annotation_html(bm.value)
|
||||
if mi.comments:
|
||||
a_offset = mi.comments.find('<div class="user_annotations">')
|
||||
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
|
||||
|
||||
if a_offset >= 0:
|
||||
mi.comments = mi.comments[:a_offset]
|
||||
if ad_offset >= 0:
|
||||
mi.comments = mi.comments[:ad_offset]
|
||||
if set(mi.tags).intersection(ignore_tags):
|
||||
return
|
||||
if mi.comments:
|
||||
hrTag = Tag(user_notes_soup,'hr')
|
||||
hrTag['class'] = 'annotations_divider'
|
||||
user_notes_soup.insert(0, hrTag)
|
||||
|
||||
mi.comments += unicode(user_notes_soup.prettify())
|
||||
else:
|
||||
mi.comments = unicode(user_notes_soup.prettify())
|
||||
# Update library comments
|
||||
db.set_comment(db_id, mi.comments)
|
||||
|
||||
# Add bookmark file to db_id
|
||||
db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
|
||||
bm.value.path, index_is_id=True)
|
||||
elif bm.type == 'kindle_clippings':
|
||||
# Find 'My Clippings' author=Kindle in database, or add
|
||||
last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
|
||||
mc_id = list(db.data.search_getting_ids('title:"My Clippings"', ''))
|
||||
if mc_id:
|
||||
db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'],
|
||||
index_is_id=True)
|
||||
mi = db.get_metadata(mc_id[0], index_is_id=True)
|
||||
mi.comments = last_update
|
||||
db.set_metadata(mc_id[0], mi)
|
||||
else:
|
||||
mi = MetaInformation('My Clippings', authors = ['Kindle'])
|
||||
mi.tags = ['Clippings']
|
||||
mi.comments = last_update
|
||||
db.add_books([bm.value['path']], ['txt'], [mi])
|
||||
|
||||
class KINDLE2(KINDLE):
|
||||
|
||||
|
@ -16,6 +16,7 @@ from calibre.devices.usbms.driver import USBMS, debug_print
|
||||
from calibre import prints
|
||||
from calibre.devices.usbms.books import CollectionsBookList
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class KOBO(USBMS):
|
||||
|
||||
@ -76,6 +77,11 @@ class KOBO(USBMS):
|
||||
self.book_class = Book
|
||||
self.dbversion = 7
|
||||
|
||||
def create_annotations_path(self, mdata, device_path=None):
|
||||
if device_path:
|
||||
return device_path
|
||||
return USBMS.create_annotations_path(self, mdata)
|
||||
|
||||
def books(self, oncard=None, end_session=True):
|
||||
from calibre.ebooks.metadata.meta import path_to_ext
|
||||
|
||||
@ -750,9 +756,12 @@ class KOBO(USBMS):
|
||||
|
||||
blists = {}
|
||||
for i in paths:
|
||||
if booklists[i] is not None:
|
||||
#debug_print('Booklist: ', i)
|
||||
blists[i] = booklists[i]
|
||||
try:
|
||||
if booklists[i] is not None:
|
||||
#debug_print('Booklist: ', i)
|
||||
blists[i] = booklists[i]
|
||||
except IndexError:
|
||||
pass
|
||||
opts = self.settings()
|
||||
if opts.extra_customization:
|
||||
collections = [x.lower().strip() for x in
|
||||
@ -865,3 +874,21 @@ class KOBO(USBMS):
|
||||
else:
|
||||
debug_print("ImageID could not be retreived from the database")
|
||||
|
||||
def prepare_addable_books(self, paths):
|
||||
'''
|
||||
The Kobo supports an encrypted epub refered to as a kepub
|
||||
Unfortunately Kobo decided to put the files on the device
|
||||
with no file extension. I just hope that decision causes
|
||||
them as much grief as it does me :-)
|
||||
|
||||
This has to make a temporary copy of the book files with a
|
||||
epub extension to allow Calibre's normal processing to
|
||||
deal with the file appropriately
|
||||
'''
|
||||
for idx, path in enumerate(paths):
|
||||
if path.find('kepub') >= 0:
|
||||
with closing(open(path)) as r:
|
||||
tf = PersistentTemporaryFile(suffix='.epub')
|
||||
tf.write(r.read())
|
||||
paths[idx] = tf.name
|
||||
return paths
|
||||
|
@ -1068,6 +1068,12 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
'''
|
||||
return {}
|
||||
|
||||
def add_annotation_to_library(self, db, db_id, annotation):
|
||||
'''
|
||||
Add an annotation to the calibre library
|
||||
'''
|
||||
pass
|
||||
|
||||
def create_upload_path(self, path, mdata, fname, create_dirs=True):
|
||||
path = os.path.abspath(path)
|
||||
maxlen = self.MAX_PATH_LEN
|
||||
@ -1147,3 +1153,6 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
os.makedirs(filedir)
|
||||
|
||||
return filepath
|
||||
|
||||
def create_annotations_path(self, mdata, device_path=None):
|
||||
return self.create_upload_path(os.path.abspath('/<storage>'), mdata, 'x.bookmark', create_dirs=False)
|
||||
|
@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
|
||||
from calibre.ebooks.chm.reader import CHMReader
|
||||
log.debug('Opening CHM file')
|
||||
rdr = CHMReader(chm_path, log, self.opts)
|
||||
rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding)
|
||||
log.debug('Extracting CHM to %s' % output_dir)
|
||||
rdr.extract_content(output_dir, debug_dump=debug_dump)
|
||||
self._chm_reader = rdr
|
||||
|
@ -40,14 +40,14 @@ class CHMError(Exception):
|
||||
pass
|
||||
|
||||
class CHMReader(CHMFile):
|
||||
def __init__(self, input, log, opts):
|
||||
def __init__(self, input, log, input_encoding=None):
|
||||
CHMFile.__init__(self)
|
||||
if isinstance(input, unicode):
|
||||
input = input.encode(filesystem_encoding)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self.opts = opts
|
||||
self.input_encoding = input_encoding
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
@ -156,8 +156,8 @@ class CHMReader(CHMFile):
|
||||
break
|
||||
|
||||
def _reformat(self, data, htmlpath):
|
||||
if self.opts.input_encoding:
|
||||
data = data.decode(self.opts.input_encoding)
|
||||
if self.input_encoding:
|
||||
data = data.decode(self.input_encoding)
|
||||
try:
|
||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||
soup = BeautifulSoup(data)
|
||||
|
@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
|
||||
def unarchive(self, path, tdir):
|
||||
extract(path, tdir)
|
||||
files = list(walk(tdir))
|
||||
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
|
||||
for f in files]
|
||||
from calibre.customize.ui import available_input_formats
|
||||
fmts = available_input_formats()
|
||||
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
|
||||
|
@ -305,11 +305,13 @@ class RTFInput(InputFormatPlugin):
|
||||
html = 'index.xhtml'
|
||||
with open(html, 'wb') as f:
|
||||
res = transform.tostring(result)
|
||||
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||
#clean multiple \n
|
||||
res = re.sub('\n+', '\n', res)
|
||||
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
||||
res = re.sub('\s*<body>', '<body>', res)
|
||||
res = re.sub('(?<=\n)\n{2}',
|
||||
u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||
# res = re.sub('\s*<body>', '<body>', res)
|
||||
# res = re.sub('(?<=\n)\n{2}',
|
||||
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||
f.write(res)
|
||||
self.write_inline_css(inline_class, border_styles)
|
||||
stream.seek(0)
|
||||
|
@ -376,13 +376,13 @@ class ParseRtf:
|
||||
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
||||
raise RtfInvalidCodeException, msg
|
||||
if self.__run_level > 1:
|
||||
sys.stderr.write(_('File could be older RTF...\n'))
|
||||
sys.stderr.write('File could be older RTF...\n')
|
||||
if found_destination:
|
||||
if self.__run_level > 1:
|
||||
sys.stderr.write(_(
|
||||
sys.stderr.write(
|
||||
'File also has newer RTF.\n'
|
||||
'Will do the best to convert.\n'
|
||||
))
|
||||
)
|
||||
add_brackets_obj = add_brackets.AddBrackets(
|
||||
in_file = self.__temp_file,
|
||||
bug_handler = RtfInvalidCodeException,
|
||||
|
@ -11,11 +11,11 @@
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, tempfile
|
||||
import sys, os, tempfile
|
||||
|
||||
from calibre.ebooks.rtf2xml import copy, check_brackets
|
||||
# note to self. This is the first module in which I use tempfile. A good idea?
|
||||
"""
|
||||
"""
|
||||
|
||||
class AddBrackets:
|
||||
"""
|
||||
Add brackets for old RTF.
|
||||
@ -41,6 +41,7 @@ class AddBrackets:
|
||||
self.__copy = copy
|
||||
self.__write_to = tempfile.mktemp()
|
||||
self.__run_level = run_level
|
||||
|
||||
def __initiate_values(self):
|
||||
"""
|
||||
"""
|
||||
@ -82,14 +83,16 @@ class AddBrackets:
|
||||
'cw<ci<subscript_' ,
|
||||
'cw<ci<superscrip',
|
||||
'cw<ci<underlined' ,
|
||||
'cw<ul<underlined' ,
|
||||
# 'cw<ul<underlined' ,
|
||||
]
|
||||
|
||||
def __before_body_func(self, line):
|
||||
"""
|
||||
"""
|
||||
if self.__token_info == 'mi<mk<body-open_':
|
||||
self.__state = 'in_body'
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __in_body_func(self, line):
|
||||
"""
|
||||
"""
|
||||
@ -108,6 +111,7 @@ class AddBrackets:
|
||||
self.__state = 'after_control_word'
|
||||
else:
|
||||
self.__write_obj.write(line)
|
||||
|
||||
def __after_control_word_func(self, line):
|
||||
"""
|
||||
"""
|
||||
@ -122,6 +126,7 @@ class AddBrackets:
|
||||
self.__ignore_count = self.__ob_count
|
||||
else:
|
||||
self.__state = 'in_body'
|
||||
|
||||
def __write_group(self):
|
||||
"""
|
||||
"""
|
||||
@ -141,6 +146,7 @@ class AddBrackets:
|
||||
self.__write_obj.write(inline_string)
|
||||
self.__open_bracket = 1
|
||||
self.__temp_group = []
|
||||
|
||||
def __change_permanent_group(self):
|
||||
"""
|
||||
use temp group to change permanent group
|
||||
@ -150,6 +156,7 @@ class AddBrackets:
|
||||
if token_info in self.__accept:
|
||||
att = line[20:-1]
|
||||
self.__inline[token_info] = att
|
||||
|
||||
def __ignore_func(self, line):
|
||||
"""
|
||||
Don't add any brackets while inside of brackets RTF has already
|
||||
@ -159,12 +166,14 @@ class AddBrackets:
|
||||
if self.__token_info == 'cb<nu<clos-brack'and\
|
||||
self.__cb_count == self.__ignore_count:
|
||||
self.__state = 'in_body'
|
||||
|
||||
def __check_brackets(self, in_file):
|
||||
self.__check_brack_obj = check_brackets.CheckBrackets\
|
||||
(file = in_file)
|
||||
good_br = self.__check_brack_obj.check_brackets()[0]
|
||||
if not good_br:
|
||||
return 1
|
||||
|
||||
def add_brackets(self):
|
||||
"""
|
||||
"""
|
||||
|
@ -5,14 +5,57 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, datetime
|
||||
|
||||
from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt
|
||||
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
|
||||
from calibre import strftime
|
||||
from calibre.gui2.actions import InterfaceAction
|
||||
from calibre.devices.usbms.device import Device
|
||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||
|
||||
class Updater(QThread): # {{{
|
||||
|
||||
update_progress = pyqtSignal(int)
|
||||
update_done = pyqtSignal()
|
||||
|
||||
def __init__(self, parent, db, device, annotation_map, done_callback):
|
||||
QThread.__init__(self, parent)
|
||||
self.errors = {}
|
||||
self.db = db
|
||||
self.keep_going = True
|
||||
self.pd = ProgressDialog(_('Merging user annotations into database'), '',
|
||||
0, len(annotation_map), parent=parent)
|
||||
|
||||
self.device = device
|
||||
self.annotation_map = annotation_map
|
||||
self.done_callback = done_callback
|
||||
self.pd.canceled_signal.connect(self.canceled)
|
||||
self.pd.setModal(True)
|
||||
self.pd.show()
|
||||
self.update_progress.connect(self.pd.set_value,
|
||||
type=Qt.QueuedConnection)
|
||||
self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
|
||||
|
||||
def canceled(self):
|
||||
self.keep_going = False
|
||||
self.pd.hide()
|
||||
|
||||
def run(self):
|
||||
for i, id_ in enumerate(self.annotation_map):
|
||||
if not self.keep_going:
|
||||
break
|
||||
bm = Device.UserAnnotation(self.annotation_map[id_][0],
|
||||
self.annotation_map[id_][1])
|
||||
try:
|
||||
self.device.add_annotation_to_library(self.db, id_, bm)
|
||||
except:
|
||||
import traceback
|
||||
self.errors[id_] = traceback.format_exc()
|
||||
self.update_progress.emit(i)
|
||||
self.update_done.emit()
|
||||
self.done_callback(self.annotation_map.keys(), self.errors)
|
||||
|
||||
# }}}
|
||||
|
||||
class FetchAnnotationsAction(InterfaceAction):
|
||||
|
||||
@ -41,13 +84,21 @@ class FetchAnnotationsAction(InterfaceAction):
|
||||
fmts.append(format.lower())
|
||||
return fmts
|
||||
|
||||
def get_device_path_from_id(id_):
|
||||
paths = []
|
||||
for x in ('memory', 'card_a', 'card_b'):
|
||||
x = getattr(self.gui, x+'_view').model()
|
||||
paths += x.paths_for_db_ids(set([id_]), as_map=True)[id_]
|
||||
return paths[0].path if paths else None
|
||||
|
||||
def generate_annotation_paths(ids, db, device):
|
||||
# Generate path templates
|
||||
# Individual storage mount points scanned/resolved in driver.get_annotations()
|
||||
path_map = {}
|
||||
for id in ids:
|
||||
path = get_device_path_from_id(id)
|
||||
mi = db.get_metadata(id, index_is_id=True)
|
||||
a_path = device.create_upload_path(os.path.abspath('/<storage>'), mi, 'x.bookmark', create_dirs=False)
|
||||
a_path = device.create_annotations_path(mi, device_path=path)
|
||||
path_map[id] = dict(path=a_path, fmts=get_formats(id))
|
||||
return path_map
|
||||
|
||||
@ -78,166 +129,6 @@ class FetchAnnotationsAction(InterfaceAction):
|
||||
path_map)
|
||||
|
||||
def annotations_fetched(self, job):
|
||||
from calibre.devices.usbms.device import Device
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||
from calibre.library.cli import do_add_format
|
||||
|
||||
class Updater(QThread): # {{{
|
||||
|
||||
update_progress = pyqtSignal(int)
|
||||
update_done = pyqtSignal()
|
||||
FINISHED_READING_PCT_THRESHOLD = 96
|
||||
|
||||
def __init__(self, parent, db, annotation_map, done_callback):
|
||||
QThread.__init__(self, parent)
|
||||
self.db = db
|
||||
self.pd = ProgressDialog(_('Merging user annotations into database'), '',
|
||||
0, len(job.result), parent=parent)
|
||||
|
||||
self.am = annotation_map
|
||||
self.done_callback = done_callback
|
||||
self.pd.canceled_signal.connect(self.canceled)
|
||||
self.pd.setModal(True)
|
||||
self.pd.show()
|
||||
self.update_progress.connect(self.pd.set_value,
|
||||
type=Qt.QueuedConnection)
|
||||
self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
|
||||
|
||||
def generate_annotation_html(self, bookmark):
|
||||
# Returns <div class="user_annotations"> ... </div>
|
||||
last_read_location = bookmark.last_read_location
|
||||
timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
|
||||
percent_read = bookmark.percent_read
|
||||
|
||||
ka_soup = BeautifulSoup()
|
||||
dtc = 0
|
||||
divTag = Tag(ka_soup,'div')
|
||||
divTag['class'] = 'user_annotations'
|
||||
|
||||
# Add the last-read location
|
||||
spanTag = Tag(ka_soup, 'span')
|
||||
spanTag['style'] = 'font-weight:bold'
|
||||
if bookmark.book_format == 'pdf':
|
||||
spanTag.insert(0,NavigableString(
|
||||
_("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
|
||||
dict(time=strftime(u'%x', timestamp.timetuple()),
|
||||
loc=last_read_location,
|
||||
pr=percent_read)))
|
||||
else:
|
||||
spanTag.insert(0,NavigableString(
|
||||
_("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
|
||||
dict(time=strftime(u'%x', timestamp.timetuple()),
|
||||
loc=last_read_location,
|
||||
pr=percent_read)))
|
||||
|
||||
divTag.insert(dtc, spanTag)
|
||||
dtc += 1
|
||||
divTag.insert(dtc, Tag(ka_soup,'br'))
|
||||
dtc += 1
|
||||
|
||||
if bookmark.user_notes:
|
||||
user_notes = bookmark.user_notes
|
||||
annotations = []
|
||||
|
||||
# Add the annotations sorted by location
|
||||
# Italicize highlighted text
|
||||
for location in sorted(user_notes):
|
||||
if user_notes[location]['text']:
|
||||
annotations.append(
|
||||
_('<b>Location %(dl)d • %(typ)s</b><br />%(text)s<br />') % \
|
||||
dict(dl=user_notes[location]['displayed_location'],
|
||||
typ=user_notes[location]['type'],
|
||||
text=(user_notes[location]['text'] if \
|
||||
user_notes[location]['type'] == 'Note' else \
|
||||
'<i>%s</i>' % user_notes[location]['text'])))
|
||||
else:
|
||||
if bookmark.book_format == 'pdf':
|
||||
annotations.append(
|
||||
_('<b>Page %(dl)d • %(typ)s</b><br />') % \
|
||||
dict(dl=user_notes[location]['displayed_location'],
|
||||
typ=user_notes[location]['type']))
|
||||
else:
|
||||
annotations.append(
|
||||
_('<b>Location %(dl)d • %(typ)s</b><br />') % \
|
||||
dict(dl=user_notes[location]['displayed_location'],
|
||||
typ=user_notes[location]['type']))
|
||||
|
||||
for annotation in annotations:
|
||||
divTag.insert(dtc, annotation)
|
||||
dtc += 1
|
||||
|
||||
ka_soup.insert(0,divTag)
|
||||
return ka_soup
|
||||
|
||||
'''
|
||||
def mark_book_as_read(self,id):
|
||||
read_tag = gprefs.get('catalog_epub_mobi_read_tag')
|
||||
if read_tag:
|
||||
self.db.set_tags(id, [read_tag], append=True)
|
||||
'''
|
||||
|
||||
def canceled(self):
|
||||
self.pd.hide()
|
||||
|
||||
def run(self):
|
||||
ignore_tags = set(['Catalog','Clippings'])
|
||||
for (i, id) in enumerate(self.am):
|
||||
bm = Device.UserAnnotation(self.am[id][0],self.am[id][1])
|
||||
if bm.type == 'kindle_bookmark':
|
||||
mi = self.db.get_metadata(id, index_is_id=True)
|
||||
user_notes_soup = self.generate_annotation_html(bm.value)
|
||||
if mi.comments:
|
||||
a_offset = mi.comments.find('<div class="user_annotations">')
|
||||
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
|
||||
|
||||
if a_offset >= 0:
|
||||
mi.comments = mi.comments[:a_offset]
|
||||
if ad_offset >= 0:
|
||||
mi.comments = mi.comments[:ad_offset]
|
||||
if set(mi.tags).intersection(ignore_tags):
|
||||
continue
|
||||
if mi.comments:
|
||||
hrTag = Tag(user_notes_soup,'hr')
|
||||
hrTag['class'] = 'annotations_divider'
|
||||
user_notes_soup.insert(0,hrTag)
|
||||
|
||||
mi.comments += user_notes_soup.prettify()
|
||||
else:
|
||||
mi.comments = unicode(user_notes_soup.prettify())
|
||||
# Update library comments
|
||||
self.db.set_comment(id, mi.comments)
|
||||
|
||||
'''
|
||||
# Update 'read' tag except for Catalogs/Clippings
|
||||
if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD:
|
||||
if not set(mi.tags).intersection(ignore_tags):
|
||||
self.mark_book_as_read(id)
|
||||
'''
|
||||
|
||||
# Add bookmark file to id
|
||||
self.db.add_format_with_hooks(id, bm.value.bookmark_extension,
|
||||
bm.value.path, index_is_id=True)
|
||||
self.update_progress.emit(i)
|
||||
elif bm.type == 'kindle_clippings':
|
||||
# Find 'My Clippings' author=Kindle in database, or add
|
||||
last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
|
||||
mc_id = list(db.data.parse('title:"My Clippings"'))
|
||||
if mc_id:
|
||||
do_add_format(self.db, mc_id[0], 'TXT', bm.value['path'])
|
||||
mi = self.db.get_metadata(mc_id[0], index_is_id=True)
|
||||
mi.comments = last_update
|
||||
self.db.set_metadata(mc_id[0], mi)
|
||||
else:
|
||||
mi = MetaInformation('My Clippings', authors = ['Kindle'])
|
||||
mi.tags = ['Clippings']
|
||||
mi.comments = last_update
|
||||
self.db.add_books([bm.value['path']], ['txt'], [mi])
|
||||
|
||||
self.update_done.emit()
|
||||
self.done_callback(self.am.keys())
|
||||
|
||||
# }}}
|
||||
|
||||
if not job.result: return
|
||||
|
||||
@ -246,9 +137,25 @@ class FetchAnnotationsAction(InterfaceAction):
|
||||
_('User annotations generated from main library only'),
|
||||
show=True)
|
||||
db = self.gui.library_view.model().db
|
||||
device = self.gui.device_manager.device
|
||||
|
||||
self.__annotation_updater = Updater(self.gui, db, job.result,
|
||||
self.Dispatcher(self.gui.library_view.model().refresh_ids))
|
||||
self.__annotation_updater = Updater(self.gui, db, device, job.result,
|
||||
self.Dispatcher(self.annotations_updated))
|
||||
self.__annotation_updater.start()
|
||||
|
||||
def annotations_updated(self, ids, errors):
|
||||
self.gui.library_view.model().refresh_ids(ids)
|
||||
if errors:
|
||||
db = self.gui.library_view.model().db
|
||||
entries = []
|
||||
for id_, tb in errors.iteritems():
|
||||
title = id_
|
||||
if isinstance(id_, type(1)):
|
||||
title = db.title(id_, index_is_id=True)
|
||||
entries.extend([title, tb, ''])
|
||||
error_dialog(self.gui, _('Some errors'),
|
||||
_('Could not fetch annotations for some books. Click '
|
||||
'show details to see which ones.'),
|
||||
det_msg='\n'.join(entries), show=True)
|
||||
|
||||
|
||||
|
@ -1239,11 +1239,14 @@ class DeviceBooksModel(BooksModel): # {{{
|
||||
def paths(self, rows):
|
||||
return [self.db[self.map[r.row()]].path for r in rows ]
|
||||
|
||||
def paths_for_db_ids(self, db_ids):
|
||||
res = []
|
||||
def paths_for_db_ids(self, db_ids, as_map=False):
|
||||
res = defaultdict(list) if as_map else []
|
||||
for r,b in enumerate(self.db):
|
||||
if b.application_id in db_ids:
|
||||
res.append((r,b))
|
||||
if as_map:
|
||||
res[b.application_id].append(b)
|
||||
else:
|
||||
res.append((r,b))
|
||||
return res
|
||||
|
||||
def get_collections_with_ids(self):
|
||||
|
@ -242,6 +242,10 @@ Replace ``192.168.1.2`` with the local IP address of the computer running |app|.
|
||||
|
||||
If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.
|
||||
|
||||
.. note::
|
||||
As of iOS version 5 Stanza no longer works on Apple devices. Alternatives to Stanza are discussed `here <http://www.mobileread.com/forums/showthread.php?t=152789>`_.
|
||||
|
||||
|
||||
Using iBooks
|
||||
**************
|
||||
|
||||
@ -251,7 +255,7 @@ Start the Safari browser and type in the IP address and port of the computer run
|
||||
|
||||
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
|
||||
|
||||
You wills ee a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
|
||||
You will see a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
|
||||
|
||||
|
||||
With the USB cable + iTunes
|
||||
|
Loading…
x
Reference in New Issue
Block a user