mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
9548696089
71
recipes/20minutes.recipe
Normal file
71
recipes/20minutes.recipe
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||||
|
'''
|
||||||
|
20minutes.fr
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Minutes(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = '20 minutes'
|
||||||
|
__author__ = 'calibre'
|
||||||
|
description = 'Actualités'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
publisher = '20minutes.fr'
|
||||||
|
category = 'Actualités, France, Monde'
|
||||||
|
language = 'fr'
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 15
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
filterDuplicates = True
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.mna-details {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.mna-image {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.mna-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':['mn-section-heading']}),
|
||||||
|
dict(name='a', attrs={'href':['#commentaires']}),
|
||||||
|
dict(name='div', attrs={'class':['mn-right']}),
|
||||||
|
dict(name='div', attrs={'class':['mna-box']}),
|
||||||
|
dict(name='div', attrs={'class':['mna-comment-call']}),
|
||||||
|
dict(name='div', attrs={'class':['mna-tools']}),
|
||||||
|
dict(name='div', attrs={'class':['mn-trilist']})
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(id='mn-article')]
|
||||||
|
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':['mna-body','mna-signature']})
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('France', 'http://www.20minutes.fr/rss/actu-france.xml'),
|
||||||
|
('International', 'http://www.20minutes.fr/rss/monde.xml'),
|
||||||
|
('Tech/Web', 'http://www.20minutes.fr/rss/hightech.xml'),
|
||||||
|
('Sciences', 'http://www.20minutes.fr/rss/sciences.xml'),
|
||||||
|
('Economie', 'http://www.20minutes.fr/rss/economie.xml'),
|
||||||
|
('Politique', 'http://www.20minutes.fr/rss/politique.xml'),
|
||||||
|
(u'Médias', 'http://www.20minutes.fr/rss/media.xml'),
|
||||||
|
('Cinema', 'http://www.20minutes.fr/rss/cinema.xml'),
|
||||||
|
('People', 'http://www.20minutes.fr/rss/people.xml'),
|
||||||
|
('Culture', 'http://www.20minutes.fr/rss/culture.xml'),
|
||||||
|
('Sport', 'http://www.20minutes.fr/rss/sport.xml'),
|
||||||
|
('Paris', 'http://www.20minutes.fr/rss/paris.xml'),
|
||||||
|
('Lyon', 'http://www.20minutes.fr/rss/lyon.xml'),
|
||||||
|
('Toulouse', 'http://www.20minutes.fr/rss/toulouse.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
@ -22,6 +22,14 @@ class CNN(BasicNewsRecipe):
|
|||||||
#match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
|
#match_regexps = [r'http://sportsillustrated.cnn.com/.*/[1-9].html']
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.cnn_story_author, .cnn_stryathrtmp {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.cnn_strycaptiontxt, .cnnArticleGalleryPhotoContainer {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.cnn_strycbftrtxt, .cnnEditorialNote {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.cnn_strycntntlft {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
'''
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
|
(re.compile(r'<!--\[if.*if\]-->', re.DOTALL), lambda m: ''),
|
||||||
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
|
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
|
||||||
@ -32,7 +40,12 @@ class CNN(BasicNewsRecipe):
|
|||||||
remove_tags = [
|
remove_tags = [
|
||||||
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
|
{'class':['cnn_strybtntools', 'cnn_strylftcntnt',
|
||||||
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
|
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
|
||||||
'cnn_strycntntrgt', 'hed_side', 'foot']},
|
'cnn_strycntntrgt', 'hed_side', 'foot', 'cnn_strylftcntnt cnn_strylftcexpbx']},
|
||||||
|
{'class':['cnn_html_media_title_new', 'cnn_html_media_title_new cnn_html_media_title_none',
|
||||||
|
'cnnArticleGalleryCaptionControlText', 'articleGalleryNavContainer']},
|
||||||
|
{'id':['articleGalleryNav00JumpPrev', 'articleGalleryNav00Prev',
|
||||||
|
'articleGalleryNav00Next', 'articleGalleryNav00JumpNext']},
|
||||||
|
{'style':['display:none']},
|
||||||
dict(id=['ie_column']),
|
dict(id=['ie_column']),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -58,3 +71,12 @@ class CNN(BasicNewsRecipe):
|
|||||||
ans = BasicNewsRecipe.get_article_url(self, article)
|
ans = BasicNewsRecipe.get_article_url(self, article)
|
||||||
return ans.partition('?')[0]
|
return ans.partition('?')[0]
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://i.cdn.turner.com/cnn/.element/img/3.0/global/header/intl/hdr-globe-central.gif'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
||||||
|
58
recipes/ekathemerini.recipe
Normal file
58
recipes/ekathemerini.recipe
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
|
||||||
|
|
||||||
|
class Ekathimerini(BasicNewsRecipe):
|
||||||
|
title = 'ekathimerini'
|
||||||
|
__author__ = 'Thomas Scholl'
|
||||||
|
description = 'News from Greece, English edition'
|
||||||
|
masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
oldest_article = 100
|
||||||
|
publisher = 'Kathimerini'
|
||||||
|
category = 'news, GR'
|
||||||
|
language = 'en_GR'
|
||||||
|
encoding = 'windows-1253'
|
||||||
|
conversion_options = { 'linearize_tables': True}
|
||||||
|
no_stylesheets = True
|
||||||
|
delay = 1
|
||||||
|
keep_only_tags = [dict(name='td', attrs={'class':'news'})]
|
||||||
|
|
||||||
|
rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
|
||||||
|
|
||||||
|
def find_articles(self, idx, category):
|
||||||
|
for article in idx.findAll('item'):
|
||||||
|
cat = u''
|
||||||
|
cat_elem = article.find('subcat')
|
||||||
|
if cat_elem:
|
||||||
|
cat = self.tag_to_string(cat_elem)
|
||||||
|
|
||||||
|
if cat == category:
|
||||||
|
desc_html = self.tag_to_string(article.find('description'))
|
||||||
|
description = self.tag_to_string(BeautifulSoup(desc_html))
|
||||||
|
|
||||||
|
a = {
|
||||||
|
'title': self.tag_to_string(article.find('title')),
|
||||||
|
'url': self.tag_to_string(article.find('link')),
|
||||||
|
'description': description,
|
||||||
|
'date' : self.tag_to_string(article.find('pubdate')),
|
||||||
|
}
|
||||||
|
yield a
|
||||||
|
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
idx_contents = self.browser.open(self.rss_url).read()
|
||||||
|
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
||||||
|
|
||||||
|
cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')]))
|
||||||
|
cats.sort()
|
||||||
|
|
||||||
|
feeds = [(u'News',list(self.find_articles(idx, u'')))]
|
||||||
|
|
||||||
|
for cat in cats:
|
||||||
|
feeds.append((cat.capitalize(), list(self.find_articles(idx, cat))))
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')
|
||||||
|
|
@ -33,7 +33,7 @@ class ElPais(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
|
keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','cuerpo_noticia','caja_despiece']})]
|
||||||
|
|
||||||
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '
|
extra_css = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '
|
||||||
|
|
||||||
|
8
recipes/frandroid.recipe
Normal file
8
recipes/frandroid.recipe
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
class BasicUserRecipe1318572550(AutomaticNewsRecipe):
|
||||||
|
title = u'FrAndroid'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'FrAndroid', u'http://feeds.feedburner.com/Frandroid')]
|
8
recipes/googlemobileblog.recipe
Normal file
8
recipes/googlemobileblog.recipe
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
class BasicUserRecipe1318572445(AutomaticNewsRecipe):
|
||||||
|
title = u'Google Mobile Blog'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'Google Mobile Blog', u'http://googlemobile.blogspot.com/atom.xml')]
|
50
recipes/hankyoreh.recipe
Normal file
50
recipes/hankyoreh.recipe
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||||
|
'''
|
||||||
|
Profile to download The Hankyoreh
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
class Hankyoreh(BasicNewsRecipe):
|
||||||
|
title = u'Hankyoreh'
|
||||||
|
language = 'ko'
|
||||||
|
description = u'The Hankyoreh News articles'
|
||||||
|
__author__ = 'Seongkyoun Yoo'
|
||||||
|
oldest_article = 5
|
||||||
|
recursions = 1
|
||||||
|
max_articles_per_feed = 5
|
||||||
|
no_stylesheets = True
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='tr', attrs={'height':['60px']}),
|
||||||
|
dict(id=['fontSzArea'])
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(target='_blank'),
|
||||||
|
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
|
||||||
|
dict(name='iframe', attrs={'width':['590']}),
|
||||||
|
]
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(target='_top')
|
||||||
|
]
|
||||||
|
feeds = [
|
||||||
|
('All News','http://www.hani.co.kr/rss/'),
|
||||||
|
('Politics','http://www.hani.co.kr/rss/politics/'),
|
||||||
|
('Economy','http://www.hani.co.kr/rss/economy/'),
|
||||||
|
('Society','http://www.hani.co.kr/rss/society/'),
|
||||||
|
('International','http://www.hani.co.kr/rss/international/'),
|
||||||
|
('Culture','http://www.hani.co.kr/rss/culture/'),
|
||||||
|
('Sports','http://www.hani.co.kr/rss/sports/'),
|
||||||
|
('Science','http://www.hani.co.kr/rss/science/'),
|
||||||
|
('Opinion','http://www.hani.co.kr/rss/opinion/'),
|
||||||
|
('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
|
||||||
|
('English Edition','http://www.hani.co.kr/rss/english_edition/'),
|
||||||
|
('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
|
||||||
|
('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
|
||||||
|
('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
|
||||||
|
('Multihani','http://www.hani.co.kr/rss/multihani/'),
|
||||||
|
('Lead','http://www.hani.co.kr/rss/lead/'),
|
||||||
|
('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
|
||||||
|
]
|
26
recipes/hankyoreh21.recipe
Normal file
26
recipes/hankyoreh21.recipe
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||||
|
'''
|
||||||
|
Profile to download The Hankyoreh
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Hankyoreh21(BasicNewsRecipe):
|
||||||
|
title = u'Hankyoreh21'
|
||||||
|
language = 'ko'
|
||||||
|
description = u'The Hankyoreh21 Magazine articles'
|
||||||
|
__author__ = 'Seongkyoun Yoo'
|
||||||
|
oldest_article = 20
|
||||||
|
recursions = 1
|
||||||
|
max_articles_per_feed = 120
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='font', attrs={'class':'t18bk'}),
|
||||||
|
dict(id=['fontSzArea'])
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Hani21','http://h21.hani.co.kr/rss/ '),
|
||||||
|
]
|
18
recipes/korben.recipe
Normal file
18
recipes/korben.recipe
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
class BasicUserRecipe1318619728(AutomaticNewsRecipe):
|
||||||
|
title = u'Korben'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'Korben', u'http://feeds2.feedburner.com/KorbensBlog-UpgradeYourMind')]
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://korben.info/wp-content/themes/korben-steaw/hab/logo.png'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
@ -1,36 +1,35 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
|
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
|
||||||
'''
|
'''
|
||||||
Profile to download KoreaHerald
|
Profile to download KoreaHerald
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class KoreaHerald(BasicNewsRecipe):
|
class KoreaHerald(BasicNewsRecipe):
|
||||||
title = u'KoreaHerald'
|
title = u'KoreaHerald'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = u'Korea Herald News articles'
|
description = u'Korea Herald News articles'
|
||||||
__author__ = 'Seongkyoun Yoo'
|
__author__ = 'Seongkyoun Yoo'
|
||||||
oldest_article = 10
|
oldest_article = 15
|
||||||
recursions = 3
|
recursions = 3
|
||||||
max_articles_per_feed = 10
|
max_articles_per_feed = 15
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(id=['contentLeft', '_article'])
|
dict(id=['contentLeft', '_article'])
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='iframe'),
|
dict(name='iframe'),
|
||||||
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
|
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
|
||||||
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
|
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('All News','http://www.koreaherald.com/rss/020000000000.xml'),
|
('National','http://www.koreaherald.com/rss/020100000000.xml'),
|
||||||
('National','http://www.koreaherald.com/rss/020100000000.xml'),
|
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
|
||||||
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
|
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
|
||||||
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
|
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
|
||||||
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
|
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
|
||||||
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
|
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
|
||||||
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
|
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
|
||||||
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
|
]
|
||||||
]
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
title = 'Kansascity Star'
|
title = 'Kansas City Star'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
__author__ = 'TonytheBookworm'
|
__author__ = 'TonytheBookworm'
|
||||||
description = 'www.kansascity.com feed'
|
description = 'www.kansascity.com feed'
|
||||||
|
37
recipes/kyungyhang
Normal file
37
recipes/kyungyhang
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||||
|
'''
|
||||||
|
Profile to download The Kyungyhang
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Kyungyhang(BasicNewsRecipe):
|
||||||
|
title = u'Kyungyhang'
|
||||||
|
language = 'ko'
|
||||||
|
description = u'The Kyungyhang Shinmun articles'
|
||||||
|
__author__ = 'Seongkyoun Yoo'
|
||||||
|
oldest_article = 20
|
||||||
|
recursions = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs ={'class':['article_title_wrap']}),
|
||||||
|
dict(name='div', attrs ={'class':['article_txt']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = dict(id={'sub_bottom'})
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(id={'TdHot'}),
|
||||||
|
dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
|
||||||
|
dict(name='dl', attrs={'class':['CL']}),
|
||||||
|
dict(name='ul', attrs={'class':['tab']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
|
||||||
|
]
|
@ -1,32 +1,37 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
|
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
|
||||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||||
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version'
|
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
http://www.repubblica.it/
|
http://www.repubblica.it/
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class LaRepubblica(BasicNewsRecipe):
|
class LaRepubblica(BasicNewsRecipe):
|
||||||
title = 'La Repubblica'
|
title = 'La Repubblica'
|
||||||
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
|
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
|
||||||
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
|
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
|
||||||
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
|
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
|
||||||
publisher = 'Gruppo editoriale L\'Espresso'
|
publisher = 'Gruppo editoriale L\'Espresso'
|
||||||
category = 'News, politics, culture, economy, general interest'
|
category = 'News, politics, culture, economy, general interest'
|
||||||
language = 'it'
|
language = 'it'
|
||||||
timefmt = '[%a, %d %b, %Y]'
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
oldest_article = 5
|
oldest_article = 5
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
#recursion = 10
|
no_stylesheets = True
|
||||||
no_stylesheets = True
|
publication_type = 'newspaper'
|
||||||
extra_css = """
|
articles_are_obfuscated = True
|
||||||
img{display: block}
|
temp_files = []
|
||||||
"""
|
extra_css = """
|
||||||
|
img{display: block}
|
||||||
|
"""
|
||||||
|
|
||||||
|
remove_attributes = ['width','height','lang','xmlns:og','xmlns:fb']
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
|
(re.compile(r'.*?<head>', re.DOTALL|re.IGNORECASE), lambda match: '<head>'),
|
||||||
@ -35,11 +40,28 @@ class LaRepubblica(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
link = article.get('id', article.get('guid', None))
|
link = BasicNewsRecipe.get_article_url(self, article)
|
||||||
if link is None:
|
if link and not '.repubblica.it/' in link:
|
||||||
return article
|
link2 = article.get('id', article.get('guid', None))
|
||||||
return link
|
if link2:
|
||||||
|
link = link2
|
||||||
|
return link.rpartition('?')[0]
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url):
|
||||||
|
count = 0
|
||||||
|
while (count < 10):
|
||||||
|
try:
|
||||||
|
response = self.browser.open(url)
|
||||||
|
html = response.read()
|
||||||
|
count = 10
|
||||||
|
except:
|
||||||
|
print "Retrying download..."
|
||||||
|
count += 1
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write(html)
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(attrs={'class':'articolo'}),
|
dict(attrs={'class':'articolo'}),
|
||||||
dict(attrs={'class':'body-text'}),
|
dict(attrs={'class':'body-text'}),
|
||||||
@ -49,7 +71,7 @@ class LaRepubblica(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','meta']),
|
dict(name=['object','link','meta','iframe','embed']),
|
||||||
dict(name='span',attrs={'class':'linkindice'}),
|
dict(name='span',attrs={'class':'linkindice'}),
|
||||||
dict(name='div', attrs={'class':'bottom-mobile'}),
|
dict(name='div', attrs={'class':'bottom-mobile'}),
|
||||||
dict(name='div', attrs={'id':['rssdiv','blocco']}),
|
dict(name='div', attrs={'id':['rssdiv','blocco']}),
|
||||||
@ -80,3 +102,11 @@ class LaRepubblica(BasicNewsRecipe):
|
|||||||
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
|
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(['hgroup','deresponsabilizzazione','per']):
|
||||||
|
item.name = 'div'
|
||||||
|
item.attrs = []
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
76
recipes/lepoint.recipe
Normal file
76
recipes/lepoint.recipe
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||||
|
'''
|
||||||
|
LePoint.fr
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class lepoint(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Le Point'
|
||||||
|
__author__ = 'calibre'
|
||||||
|
description = 'Actualités'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publisher = 'LePoint.fr'
|
||||||
|
category = 'news, France, world'
|
||||||
|
language = 'fr'
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 15
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
filterDuplicates = True
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':['entete_chroniqueur']}),
|
||||||
|
dict(name='div', attrs={'class':['col_article']}),
|
||||||
|
dict(name='div', attrs={'class':['signature_article']}),
|
||||||
|
dict(name='div', attrs={'class':['util_font util_article']}),
|
||||||
|
dict(name='div', attrs={'class':['util_article bottom']})
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['page_article']})]
|
||||||
|
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':['util_article bottom']})
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'À la une', 'http://www.lepoint.fr/rss.xml'),
|
||||||
|
('International', 'http://www.lepoint.fr/monde/rss.xml'),
|
||||||
|
('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'),
|
||||||
|
('Sciences', 'http://www.lepoint.fr/science/rss.xml'),
|
||||||
|
('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
|
||||||
|
(u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'),
|
||||||
|
('Politique', 'http://www.lepoint.fr/politique/rss.xml'),
|
||||||
|
(u'Médias', 'http://www.lepoint.fr/medias/rss.xml'),
|
||||||
|
('Culture', 'http://www.lepoint.fr/culture/rss.xml'),
|
||||||
|
(u'Santé', 'http://www.lepoint.fr/sante/rss.xml'),
|
||||||
|
('Sport', 'http://www.lepoint.fr/sport/rss.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://www.lepoint.fr/images/commun/logo.png'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
74
recipes/lexpress.recipe
Normal file
74
recipes/lexpress.recipe
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||||
|
'''
|
||||||
|
Lexpress.fr
|
||||||
|
'''
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class lepoint(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'L\'express'
|
||||||
|
__author__ = 'calibre'
|
||||||
|
description = 'Actualités'
|
||||||
|
encoding = 'cp1252'
|
||||||
|
publisher = 'LExpress.fr'
|
||||||
|
category = 'Actualité, France, Monde'
|
||||||
|
language = 'fr'
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 15
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
filterDuplicates = True
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.current_parent, p.heure, .ouverture {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
#contenu-article {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.entete { font-weiht:bold;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':['barre-outil-fb']}),
|
||||||
|
dict(name='div', attrs={'class':['barre-outils']}),
|
||||||
|
dict(id='bloc-sommaire'),
|
||||||
|
dict(id='footer-article')
|
||||||
|
]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['bloc-article']})]
|
||||||
|
|
||||||
|
remove_tags_after = dict(id='content-article')
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'À la une', 'http://www.lexpress.fr/rss/alaune.xml'),
|
||||||
|
('International', 'http://www.lexpress.fr/rss/monde.xml'),
|
||||||
|
('Tech/Web', 'http://www.lexpress.fr/rss/high-tech.xml'),
|
||||||
|
(u'Sciences/Santé', 'http://www.lexpress.fr/rss/science-et-sante.xml'),
|
||||||
|
(u'Envronnement', 'http://www.lexpress.fr/rss/environnement.xml'),
|
||||||
|
('Economie', 'http://www.lepoint.fr/economie/rss.xml'),
|
||||||
|
(u'Socièté', 'http://www.lexpress.fr/rss/societe.xml'),
|
||||||
|
('Politique', 'http://www.lexpress.fr/rss/politique.xml'),
|
||||||
|
(u'Médias', 'http://www.lexpress.fr/rss/medias.xml'),
|
||||||
|
('Culture', 'http://www.lexpress.fr/rss/culture.xml'),
|
||||||
|
('Sport', 'http://www.lexpress.fr/rss/sport.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://static.lexpress.fr/imgstat/logo_lexpress.gif'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
@ -9,39 +9,72 @@ liberation.fr
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Liberation(BasicNewsRecipe):
|
class Liberation(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'Liberation'
|
title = u'Liberation'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'calibre'
|
||||||
description = 'News from France'
|
description = 'Actualités'
|
||||||
language = 'fr'
|
category = 'Actualités, France, Monde'
|
||||||
|
language = 'fr'
|
||||||
|
|
||||||
oldest_article = 7
|
use_embedded_content = False
|
||||||
max_articles_per_feed = 100
|
timefmt = ' [%d %b %Y]'
|
||||||
no_stylesheets = True
|
max_articles_per_feed = 15
|
||||||
use_embedded_content = False
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
filterDuplicates = True
|
||||||
|
|
||||||
html2lrf_options = ['--base-font-size', '10']
|
extra_css = '''
|
||||||
|
h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
h4, h5, h2.rubrique, {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.mna-body, entry-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
'''
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1')
|
dict(name='div', attrs={'class':'article'})
|
||||||
#,dict(name='div', attrs={'class':'object-content text text-item'})
|
,dict(name='div', attrs={'class':'text-article m-bot-s1'})
|
||||||
,dict(name='div', attrs={'class':'article'})
|
,dict(name='div', attrs={'class':'entry'})
|
||||||
#,dict(name='div', attrs={'class':'articleContent'})
|
,dict(name='div', attrs={'class':'col_contenu'})
|
||||||
,dict(name='div', attrs={'class':'entry'})
|
]
|
||||||
]
|
|
||||||
remove_tags_after = [ dict(name='div',attrs={'class':'toolbox extra_toolbox'}) ]
|
remove_tags_after = [
|
||||||
|
dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']})
|
||||||
|
,dict(name='p',attrs={'class':['chapo']})
|
||||||
|
,dict(id='_twitter_facebook')
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='p', attrs={'class':'clear'})
|
dict(name='iframe')
|
||||||
,dict(name='ul', attrs={'class':'floatLeft clear'})
|
,dict(name='a', attrs={'class':'lnk-comments'})
|
||||||
,dict(name='div', attrs={'class':'clear floatRight'})
|
,dict(name='div', attrs={'class':'toolbox'})
|
||||||
,dict(name='object')
|
,dict(name='ul', attrs={'class':'share-box'})
|
||||||
,dict(name='div', attrs={'class':'toolbox'})
|
,dict(name='ul', attrs={'class':'tool-box'})
|
||||||
,dict(name='div', attrs={'class':'cartridge cartridge-basic-bubble cat-zoneabo'})
|
,dict(name='ul', attrs={'class':'rub'})
|
||||||
#,dict(name='div', attrs={'class':'clear block block-call-items'})
|
,dict(name='p',attrs={'class':['chapo']})
|
||||||
,dict(name='div', attrs={'class':'block-content'})
|
,dict(name='p',attrs={'class':['tag']})
|
||||||
|
,dict(name='div',attrs={'class':['blokLies']})
|
||||||
|
,dict(name='div',attrs={'class':['alire']})
|
||||||
|
,dict(id='_twitter_facebook')
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'La une', u'http://www.liberation.fr/rss/laune')
|
(u'La une', u'http://rss.liberation.fr/rss/9/')
|
||||||
,(u'Monde' , u'http://www.liberation.fr/rss/monde')
|
,(u'Monde' , u'http://www.liberation.fr/rss/10/')
|
||||||
,(u'Sports', u'http://www.liberation.fr/rss/sports')
|
,(u'Économie', u'http://www.liberation.fr/rss/13/')
|
||||||
|
,(u'Politiques', u'http://www.liberation.fr/rss/11/')
|
||||||
|
,(u'Société', u'http://www.liberation.fr/rss/12/')
|
||||||
|
,(u'Cinéma', u'http://www.liberation.fr/rss/58/')
|
||||||
|
,(u'Écran', u'http://www.liberation.fr/rss/53/')
|
||||||
|
,(u'Sports', u'http://www.liberation.fr/rss/12/')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
||||||
|
@ -22,7 +22,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
|
|||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
delay = 1
|
delay = 1
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_md_1.jpg')
|
cover_url = strftime('http://www.lostiempos.com/media_recortes/%Y/%m/%d/portada_gd_1.jpg')
|
||||||
masthead_url = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg'
|
masthead_url = 'http://www.lostiempos.com/img_stat/logo_tiempos_sin_beta.jpg'
|
||||||
extra_css = """ body{font-family: Arial,Helvetica,sans-serif }
|
extra_css = """ body{font-family: Arial,Helvetica,sans-serif }
|
||||||
img{margin-bottom: 0.4em}
|
img{margin-bottom: 0.4em}
|
||||||
|
@ -4,26 +4,27 @@ __copyright__ = '2010-2011, Eddie Lau'
|
|||||||
# Region - Hong Kong, Vancouver, Toronto
|
# Region - Hong Kong, Vancouver, Toronto
|
||||||
__Region__ = 'Hong Kong'
|
__Region__ = 'Hong Kong'
|
||||||
# Users of Kindle 3 with limited system-level CJK support
|
# Users of Kindle 3 with limited system-level CJK support
|
||||||
# please replace the following "True" with "False".
|
# please replace the following "True" with "False". (Default: True)
|
||||||
__MakePeriodical__ = True
|
__MakePeriodical__ = True
|
||||||
# Turn below to True if your device supports display of CJK titles
|
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||||
__UseChineseTitle__ = False
|
__UseChineseTitle__ = False
|
||||||
# Set it to False if you want to skip images
|
# Set it to False if you want to skip images (Default: True)
|
||||||
__KeepImages__ = True
|
__KeepImages__ = True
|
||||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
|
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
|
||||||
__UseLife__ = True
|
__UseLife__ = True
|
||||||
# (HK only) It is to disable the column section which is now a premium content
|
# (HK only) It is to disable premium content (Default: False)
|
||||||
__InclCols__ = False
|
__InclPremium__ = False
|
||||||
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
|
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
|
||||||
__ParsePFF__ = False
|
__ParsePFF__ = True
|
||||||
# (HK only) Turn below to True if you wish hi-res images
|
# (HK only) Turn below to True if you wish hi-res images (Default: False)
|
||||||
__HiResImg__ = False
|
__HiResImg__ = False
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||||
2011/10/04: option to get hi-res photos for the articles
|
2011/10/04: option to get hi-res photos for the articles
|
||||||
2011/09/21: fetching "column" section is made optional.
|
2011/09/21: fetching "column" section is made optional.
|
||||||
2011/09/18: parse "column" section stuff from source text file directly.
|
2011/09/18: parse "column" section stuff from source text file directly.
|
||||||
2011/09/07: disable "column" section as it is no longer offered free.
|
2011/09/07: disable "column" section as it is no longer offered free.
|
||||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||||
@ -72,7 +73,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
dict(attrs={'class':['content']}), # for content from txt
|
dict(attrs={'class':['content']}), # for content from txt
|
||||||
dict(attrs={'class':['photo']}),
|
dict(attrs={'class':['photo']}),
|
||||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}), # images for source from life.mingpao.com
|
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
|
||||||
dict(attrs={'class':['images']}) # for images from txt
|
dict(attrs={'class':['images']}) # for images from txt
|
||||||
]
|
]
|
||||||
if __KeepImages__:
|
if __KeepImages__:
|
||||||
@ -208,18 +209,21 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||||
]:
|
]:
|
||||||
articles = self.parse_section2(url, keystr)
|
if __InclPremium__ == True:
|
||||||
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
|
else:
|
||||||
|
articles = self.parse_section2(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
if __InclCols__ == True:
|
if __InclPremium__ == True:
|
||||||
# parse column section articles directly from .txt files
|
# parse column section articles directly from .txt files
|
||||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
]:
|
]:
|
||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
@ -253,10 +257,10 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||||
|
|
||||||
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
|
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
|
||||||
articles = self.parse_section2(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||||
# articles = self.parse_section(url)
|
# articles = self.parse_section(url)
|
||||||
@ -270,18 +274,18 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||||
]:
|
]:
|
||||||
articles = self.parse_section2(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
if __InclCols__ == True:
|
if __InclPremium__ == True:
|
||||||
# parse column section articles directly from .txt files
|
# parse column section articles directly from .txt files
|
||||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
]:
|
]:
|
||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
@ -333,7 +337,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
# replace the url to the print-friendly version
|
# replace the url to the print-friendly version
|
||||||
if __ParsePFF__ == True:
|
if __ParsePFF__ == True:
|
||||||
if url.rfind('Redirect') <> -1:
|
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
|
||||||
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
||||||
url = re.sub('%2F.*%2F', '/', url)
|
url = re.sub('%2F.*%2F', '/', url)
|
||||||
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||||
@ -349,6 +353,8 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
# parse from life.mingpao.com
|
# parse from life.mingpao.com
|
||||||
def parse_section2(self, url, keystr):
|
def parse_section2(self, url, keystr):
|
||||||
|
br = mechanize.Browser()
|
||||||
|
br.set_handle_redirect(False)
|
||||||
self.get_fetchdate()
|
self.get_fetchdate()
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
a = soup.findAll('a', href=True)
|
a = soup.findAll('a', href=True)
|
||||||
@ -359,9 +365,13 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(i)
|
title = self.tag_to_string(i)
|
||||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
try:
|
||||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
br.open_novisit(url)
|
||||||
included_urls.append(url)
|
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
included_urls.append(url)
|
||||||
|
except:
|
||||||
|
print 'skipping a premium article'
|
||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
@ -382,7 +392,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
# parse from www.mingpaovan.com
|
# parse from www.mingpaovan.com
|
||||||
def parse_section3(self, url, baseUrl):
|
def parse_section3(self, url, baseUrl):
|
||||||
self.get_fetchdate()
|
self.get_fetchdate()
|
||||||
@ -470,23 +480,23 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||||
if __HiResImg__ == True:
|
if __HiResImg__ == True:
|
||||||
# TODO: add a _ in front of an image url
|
# TODO: add a _ in front of an image url
|
||||||
if url.rfind('news.mingpao.com') > -1:
|
if url.rfind('news.mingpao.com') > -1:
|
||||||
imglist = re.findall('src="?.*?jpg"', raw_html)
|
imglist = re.findall('src="?.*?jpg"', raw_html)
|
||||||
br = mechanize.Browser()
|
br = mechanize.Browser()
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
for img in imglist:
|
for img in imglist:
|
||||||
gifimg = img.replace('jpg"', 'gif"')
|
gifimg = img.replace('jpg"', 'gif"')
|
||||||
try:
|
try:
|
||||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||||
raw_html = raw_html.replace(img, gifimg)
|
raw_html = raw_html.replace(img, gifimg)
|
||||||
except:
|
except:
|
||||||
# find the location of the first _
|
# find the location of the first _
|
||||||
pos = img.find('_')
|
pos = img.find('_')
|
||||||
if pos > -1:
|
if pos > -1:
|
||||||
# if found, insert _ after the first _
|
# if found, insert _ after the first _
|
||||||
newimg = img[0:pos] + '_' + img[pos:]
|
newimg = img[0:pos] + '_' + img[pos:]
|
||||||
raw_html = raw_html.replace(img, newimg)
|
raw_html = raw_html.replace(img, newimg)
|
||||||
else:
|
else:
|
||||||
# if not found, insert _ after "
|
# if not found, insert _ after "
|
||||||
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
|
raw_html = raw_html.replace(img[1:], '"_' + img[1:])
|
||||||
elif url.rfind('life.mingpao.com') > -1:
|
elif url.rfind('life.mingpao.com') > -1:
|
||||||
@ -510,7 +520,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
pos = img.rfind('/')
|
pos = img.rfind('/')
|
||||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||||
#print 'newimg: ', newimg
|
#print 'newimg: ', newimg
|
||||||
raw_html = raw_html.replace(img, newimg)
|
raw_html = raw_html.replace(img, newimg)
|
||||||
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
|
if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
|
||||||
return raw_html
|
return raw_html
|
||||||
else:
|
else:
|
||||||
@ -549,10 +559,11 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
photo = photo.replace('class="photo"', '')
|
photo = photo.replace('class="photo"', '')
|
||||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||||
return new_raw_html + '</body></html>'
|
return new_raw_html + '</body></html>'
|
||||||
else:
|
else:
|
||||||
# .txt based file
|
# .txt based file
|
||||||
splitter = re.compile(r'\n') # Match non-digits
|
splitter = re.compile(r'\n') # Match non-digits
|
||||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||||
|
next_is_mov_link = False
|
||||||
next_is_img_txt = False
|
next_is_img_txt = False
|
||||||
title_started = False
|
title_started = False
|
||||||
met_article_start_char = False
|
met_article_start_char = False
|
||||||
@ -561,24 +572,35 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
met_article_start_char = True
|
met_article_start_char = True
|
||||||
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||||
else:
|
else:
|
||||||
if next_is_img_txt == False:
|
if next_is_img_txt == False and next_is_mov_link == False:
|
||||||
if item.startswith('='):
|
item = item.strip()
|
||||||
|
if item.startswith("=@"):
|
||||||
|
next_is_mov_link = True
|
||||||
|
elif item.startswith("=?"):
|
||||||
|
next_is_img_txt = True
|
||||||
|
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
|
||||||
|
elif item.startswith('='):
|
||||||
next_is_img_txt = True
|
next_is_img_txt = True
|
||||||
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
|
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
|
||||||
else:
|
else:
|
||||||
if met_article_start_char == False:
|
if item <> '':
|
||||||
if title_started == False:
|
if next_is_img_txt == False and met_article_start_char == False:
|
||||||
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
|
if title_started == False:
|
||||||
title_started = True
|
#print 'Title started at ', item
|
||||||
|
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
|
||||||
|
title_started = True
|
||||||
|
else:
|
||||||
|
new_raw_html = new_raw_html + item + '\n'
|
||||||
else:
|
else:
|
||||||
new_raw_html = new_raw_html + item + '\n'
|
new_raw_html = new_raw_html + item + '<p>\n'
|
||||||
else:
|
|
||||||
new_raw_html = new_raw_html + item + '<p>\n'
|
|
||||||
else:
|
else:
|
||||||
next_is_img_txt = False
|
if next_is_mov_link == True:
|
||||||
new_raw_html = new_raw_html + item + '\n'
|
next_is_mov_link = False
|
||||||
|
else:
|
||||||
|
next_is_img_txt = False
|
||||||
|
new_raw_html = new_raw_html + item + '\n'
|
||||||
return new_raw_html + '</div></body></html>'
|
return new_raw_html + '</div></body></html>'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
@ -587,7 +609,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(stype=True):
|
for item in soup.findAll(stype=True):
|
||||||
del item['absmiddle']
|
del item['absmiddle']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def create_opf(self, feeds, dir=None):
|
def create_opf(self, feeds, dir=None):
|
||||||
if dir is None:
|
if dir is None:
|
||||||
dir = self.output_dir
|
dir = self.output_dir
|
||||||
@ -678,7 +700,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if po is None:
|
if po is None:
|
||||||
self.play_order_counter += 1
|
self.play_order_counter += 1
|
||||||
po = self.play_order_counter
|
po = self.play_order_counter
|
||||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
|
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||||
play_order=po, author=auth, description=desc)
|
play_order=po, author=auth, description=desc)
|
||||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||||
for sp in a.sub_pages:
|
for sp in a.sub_pages:
|
||||||
|
18
recipes/omgubuntu.recipe
Normal file
18
recipes/omgubuntu.recipe
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
class BasicUserRecipe1318619832(AutomaticNewsRecipe):
|
||||||
|
title = u'OmgUbuntu'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'Omg Ubuntu', u'http://feeds.feedburner.com/d0od')]
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://cdn.omgubuntu.co.uk/wp-content/themes/omgubuntu/images/logo.png'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
47
recipes/phoronix.recipe
Normal file
47
recipes/phoronix.recipe
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Fetch phoronix.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class cdnet(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Phoronix'
|
||||||
|
__author__ = 'calibre'
|
||||||
|
description = 'Actualités Phoronix'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publisher = 'Phoronix.com'
|
||||||
|
category = 'news, IT, linux'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
filterDuplicates = True
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
h2 {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.KonaBody {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
remove_tags = []
|
||||||
|
|
||||||
|
remove_tags_before = dict(id='phxcms_content_phx')
|
||||||
|
remove_tags_after = dict(name='div', attrs={'class':'KonaBody'})
|
||||||
|
|
||||||
|
feeds = [('Phoronix', 'http://feeds.feedburner.com/Phoronix')]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
@ -10,27 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
class USAToday(BasicNewsRecipe):
|
class USAToday(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'USA Today'
|
title = 'USA Today'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'calibre'
|
||||||
oldest_article = 1
|
description = 'newspaper'
|
||||||
publication_type = 'newspaper'
|
encoding = 'utf-8'
|
||||||
timefmt = ''
|
publisher = 'usatoday.com'
|
||||||
max_articles_per_feed = 20
|
category = 'news, usa'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
no_stylesheets = True
|
|
||||||
extra_css = '.headline {text-align: left;}\n \
|
use_embedded_content = False
|
||||||
.byline {font-family: monospace; \
|
timefmt = ' [%d %b %Y]'
|
||||||
text-align: left; \
|
max_articles_per_feed = 15
|
||||||
margin-bottom: 1em;}\n \
|
no_stylesheets = True
|
||||||
.image {text-align: center;}\n \
|
remove_empty_feeds = True
|
||||||
.caption {text-align: center; \
|
filterDuplicates = True
|
||||||
font-size: smaller; \
|
|
||||||
font-style: italic}\n \
|
extra_css = '''
|
||||||
.credit {text-align: right; \
|
h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
margin-bottom: 0em; \
|
#post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
font-size: smaller;}\n \
|
#post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||||
.articleBody {text-align: left;}\n '
|
'''
|
||||||
#simultaneous_downloads = 1
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
|
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
|
||||||
('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
|
('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
|
||||||
@ -43,15 +44,18 @@ class USAToday(BasicNewsRecipe):
|
|||||||
('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
|
('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
|
||||||
('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
|
('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
|
||||||
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
|
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
|
||||||
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
|
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(attrs={'class':'story'})]
|
keep_only_tags = [dict(attrs={'class':'story'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'class':[
|
dict(attrs={'class':[
|
||||||
'share',
|
'share',
|
||||||
'reprints',
|
'reprints',
|
||||||
'inline-h3',
|
'inline-h3',
|
||||||
'info-extras',
|
'info-extras rounded',
|
||||||
|
'inset',
|
||||||
'ppy-outer',
|
'ppy-outer',
|
||||||
'ppy-caption',
|
'ppy-caption',
|
||||||
'comments',
|
'comments',
|
||||||
@ -61,9 +65,13 @@ class USAToday(BasicNewsRecipe):
|
|||||||
'tags',
|
'tags',
|
||||||
'bottom-tools',
|
'bottom-tools',
|
||||||
'sponsoredlinks',
|
'sponsoredlinks',
|
||||||
|
'corrections'
|
||||||
]}),
|
]}),
|
||||||
|
dict(name='ul', attrs={'class':'inside-copy'}),
|
||||||
dict(id=['pluck']),
|
dict(id=['pluck']),
|
||||||
]
|
dict(id=['updated']),
|
||||||
|
dict(id=['post-date-updated'])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_masthead_url(self):
|
def get_masthead_url(self):
|
||||||
|
68
recipes/zdnet.fr.recipe
Normal file
68
recipes/zdnet.fr.recipe
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011 Aurélien Chabot <contact@aurelienchabot.fr>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Fetch zdnet.fr
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
class zdnet(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'ZDNet.fr'
|
||||||
|
__author__ = 'calibre'
|
||||||
|
description = 'Actualités'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publisher = 'ZDNet.fr'
|
||||||
|
category = 'Actualité, Informatique, IT'
|
||||||
|
language = 'fr'
|
||||||
|
|
||||||
|
use_embedded_content = False
|
||||||
|
timefmt = ' [%d %b %Y]'
|
||||||
|
max_articles_per_feed = 15
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
filterDuplicates = True
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
.contentmetadata p {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
#content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':['toolbox']}),
|
||||||
|
dict(name='div', attrs={'class':['clear clearfix']}),
|
||||||
|
dict(id='emailtoafriend'),
|
||||||
|
dict(id='storyaudio'),
|
||||||
|
dict(id='fbtwContainer'),
|
||||||
|
dict(name='h5')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id='leftcol')
|
||||||
|
remove_tags_after = dict(id='content')
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Informatique', 'http://www.zdnet.fr/feeds/rss/actualites/informatique/'),
|
||||||
|
('Internet', 'http://www.zdnet.fr/feeds/rss/actualites/internet/'),
|
||||||
|
('Telecom', 'http://www.zdnet.fr/feeds/rss/actualites/telecoms/')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def get_masthead_url(self):
|
||||||
|
masthead = 'http://www.zdnet.fr/images/base/logo.png'
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
try:
|
||||||
|
br.open(masthead)
|
||||||
|
except:
|
||||||
|
self.log("\nCover unavailable")
|
||||||
|
masthead = None
|
||||||
|
return masthead
|
@ -1,7 +1,7 @@
|
|||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<xsl:stylesheet version="1.0"
|
<xsl:stylesheet version="1.0"
|
||||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
xmlns:html="http://www.w3.org/1999/xhtml"
|
xmlns="http://www.w3.org/1999/xhtml"
|
||||||
xmlns:rtf="http://rtf2xml.sourceforge.net/"
|
xmlns:rtf="http://rtf2xml.sourceforge.net/"
|
||||||
xmlns:c="calibre"
|
xmlns:c="calibre"
|
||||||
extension-element-prefixes="c"
|
extension-element-prefixes="c"
|
||||||
@ -63,11 +63,16 @@
|
|||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template name = "para">
|
<xsl:template name = "para">
|
||||||
<xsl:if test = "normalize-space(.) or child::*">
|
<xsl:element name = "p">
|
||||||
<xsl:element name = "p">
|
<xsl:choose>
|
||||||
<xsl:call-template name = "para-content"/>
|
<xsl:when test = "normalize-space(.) or child::*">
|
||||||
</xsl:element>
|
<xsl:call-template name = "para-content"/>
|
||||||
</xsl:if>
|
</xsl:when>
|
||||||
|
<xsl:otherwise>
|
||||||
|
<xsl:text> </xsl:text>
|
||||||
|
</xsl:otherwise>
|
||||||
|
</xsl:choose>
|
||||||
|
</xsl:element>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template name = "para_off">
|
<xsl:template name = "para_off">
|
||||||
@ -149,7 +154,7 @@
|
|||||||
<xsl:template match="rtf:doc-information" mode="header">
|
<xsl:template match="rtf:doc-information" mode="header">
|
||||||
<link rel="stylesheet" type="text/css" href="styles.css"/>
|
<link rel="stylesheet" type="text/css" href="styles.css"/>
|
||||||
<xsl:if test="not(rtf:title)">
|
<xsl:if test="not(rtf:title)">
|
||||||
<title>unamed</title>
|
<title>unnamed</title>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
@ -445,7 +450,10 @@
|
|||||||
|
|
||||||
<xsl:template match = "rtf:field[@type='hyperlink']">
|
<xsl:template match = "rtf:field[@type='hyperlink']">
|
||||||
<xsl:element name ="a">
|
<xsl:element name ="a">
|
||||||
<xsl:attribute name = "href"><xsl:if test="not(contains(@link, '/'))">#</xsl:if><xsl:value-of select = "@link"/></xsl:attribute>
|
<xsl:attribute name = "href">
|
||||||
|
<xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
|
||||||
|
<xsl:value-of select = "@link"/>
|
||||||
|
</xsl:attribute>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
@ -49,6 +49,15 @@ class ANDROID(USBMS):
|
|||||||
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
|
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
|
||||||
0x70c6 : [0x226]
|
0x70c6 : [0x226]
|
||||||
},
|
},
|
||||||
|
# Freescale
|
||||||
|
0x15a2 : {
|
||||||
|
0x0c01 : [0x226]
|
||||||
|
},
|
||||||
|
|
||||||
|
# Alcatel
|
||||||
|
0x05c6 : {
|
||||||
|
0x9018 : [0x0226],
|
||||||
|
},
|
||||||
|
|
||||||
# Sony Ericsson
|
# Sony Ericsson
|
||||||
0xfce : {
|
0xfce : {
|
||||||
@ -139,7 +148,8 @@ class ANDROID(USBMS):
|
|||||||
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
||||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
||||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'VIZIO']
|
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
|
||||||
|
'VIZIO', 'GOOGLE', 'FREESCAL']
|
||||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||||
@ -150,7 +160,7 @@ class ANDROID(USBMS):
|
|||||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
|
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
|
||||||
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
||||||
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
||||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008']
|
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||||
|
@ -62,7 +62,7 @@ class DevicePlugin(Plugin):
|
|||||||
#: Icon for this device
|
#: Icon for this device
|
||||||
icon = I('reader.png')
|
icon = I('reader.png')
|
||||||
|
|
||||||
# Used by gui2.ui:annotations_fetched() and devices.kindle.driver:get_annotations()
|
# Encapsulates an annotation fetched from the device
|
||||||
UserAnnotation = namedtuple('Annotation','type, value')
|
UserAnnotation = namedtuple('Annotation','type, value')
|
||||||
|
|
||||||
#: GUI displays this as a message if not None. Useful if opening can take a
|
#: GUI displays this as a message if not None. Useful if opening can take a
|
||||||
|
@ -13,6 +13,8 @@ import datetime, os, re, sys, json, hashlib
|
|||||||
from calibre.devices.kindle.apnx import APNXBuilder
|
from calibre.devices.kindle.apnx import APNXBuilder
|
||||||
from calibre.devices.kindle.bookmark import Bookmark
|
from calibre.devices.kindle.bookmark import Bookmark
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre import strftime
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Notes on collections:
|
Notes on collections:
|
||||||
@ -164,6 +166,121 @@ class KINDLE(USBMS):
|
|||||||
# This returns as job.result in gui2.ui.annotations_fetched(self,job)
|
# This returns as job.result in gui2.ui.annotations_fetched(self,job)
|
||||||
return bookmarked_books
|
return bookmarked_books
|
||||||
|
|
||||||
|
def generate_annotation_html(self, bookmark):
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
|
||||||
|
# Returns <div class="user_annotations"> ... </div>
|
||||||
|
last_read_location = bookmark.last_read_location
|
||||||
|
timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
|
||||||
|
percent_read = bookmark.percent_read
|
||||||
|
|
||||||
|
ka_soup = BeautifulSoup()
|
||||||
|
dtc = 0
|
||||||
|
divTag = Tag(ka_soup,'div')
|
||||||
|
divTag['class'] = 'user_annotations'
|
||||||
|
|
||||||
|
# Add the last-read location
|
||||||
|
spanTag = Tag(ka_soup, 'span')
|
||||||
|
spanTag['style'] = 'font-weight:bold'
|
||||||
|
if bookmark.book_format == 'pdf':
|
||||||
|
spanTag.insert(0,NavigableString(
|
||||||
|
_("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
|
||||||
|
dict(time=strftime(u'%x', timestamp.timetuple()),
|
||||||
|
loc=last_read_location,
|
||||||
|
pr=percent_read)))
|
||||||
|
else:
|
||||||
|
spanTag.insert(0,NavigableString(
|
||||||
|
_("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
|
||||||
|
dict(time=strftime(u'%x', timestamp.timetuple()),
|
||||||
|
loc=last_read_location,
|
||||||
|
pr=percent_read)))
|
||||||
|
|
||||||
|
divTag.insert(dtc, spanTag)
|
||||||
|
dtc += 1
|
||||||
|
divTag.insert(dtc, Tag(ka_soup,'br'))
|
||||||
|
dtc += 1
|
||||||
|
|
||||||
|
if bookmark.user_notes:
|
||||||
|
user_notes = bookmark.user_notes
|
||||||
|
annotations = []
|
||||||
|
|
||||||
|
# Add the annotations sorted by location
|
||||||
|
# Italicize highlighted text
|
||||||
|
for location in sorted(user_notes):
|
||||||
|
if user_notes[location]['text']:
|
||||||
|
annotations.append(
|
||||||
|
_('<b>Location %(dl)d • %(typ)s</b><br />%(text)s<br />') % \
|
||||||
|
dict(dl=user_notes[location]['displayed_location'],
|
||||||
|
typ=user_notes[location]['type'],
|
||||||
|
text=(user_notes[location]['text'] if \
|
||||||
|
user_notes[location]['type'] == 'Note' else \
|
||||||
|
'<i>%s</i>' % user_notes[location]['text'])))
|
||||||
|
else:
|
||||||
|
if bookmark.book_format == 'pdf':
|
||||||
|
annotations.append(
|
||||||
|
_('<b>Page %(dl)d • %(typ)s</b><br />') % \
|
||||||
|
dict(dl=user_notes[location]['displayed_location'],
|
||||||
|
typ=user_notes[location]['type']))
|
||||||
|
else:
|
||||||
|
annotations.append(
|
||||||
|
_('<b>Location %(dl)d • %(typ)s</b><br />') % \
|
||||||
|
dict(dl=user_notes[location]['displayed_location'],
|
||||||
|
typ=user_notes[location]['type']))
|
||||||
|
|
||||||
|
for annotation in annotations:
|
||||||
|
divTag.insert(dtc, annotation)
|
||||||
|
dtc += 1
|
||||||
|
|
||||||
|
ka_soup.insert(0,divTag)
|
||||||
|
return ka_soup
|
||||||
|
|
||||||
|
|
||||||
|
def add_annotation_to_library(self, db, db_id, annotation):
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
bm = annotation
|
||||||
|
ignore_tags = set(['Catalog', 'Clippings'])
|
||||||
|
|
||||||
|
if bm.type == 'kindle_bookmark':
|
||||||
|
mi = db.get_metadata(db_id, index_is_id=True)
|
||||||
|
user_notes_soup = self.generate_annotation_html(bm.value)
|
||||||
|
if mi.comments:
|
||||||
|
a_offset = mi.comments.find('<div class="user_annotations">')
|
||||||
|
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
|
||||||
|
|
||||||
|
if a_offset >= 0:
|
||||||
|
mi.comments = mi.comments[:a_offset]
|
||||||
|
if ad_offset >= 0:
|
||||||
|
mi.comments = mi.comments[:ad_offset]
|
||||||
|
if set(mi.tags).intersection(ignore_tags):
|
||||||
|
return
|
||||||
|
if mi.comments:
|
||||||
|
hrTag = Tag(user_notes_soup,'hr')
|
||||||
|
hrTag['class'] = 'annotations_divider'
|
||||||
|
user_notes_soup.insert(0, hrTag)
|
||||||
|
|
||||||
|
mi.comments += unicode(user_notes_soup.prettify())
|
||||||
|
else:
|
||||||
|
mi.comments = unicode(user_notes_soup.prettify())
|
||||||
|
# Update library comments
|
||||||
|
db.set_comment(db_id, mi.comments)
|
||||||
|
|
||||||
|
# Add bookmark file to db_id
|
||||||
|
db.add_format_with_hooks(db_id, bm.value.bookmark_extension,
|
||||||
|
bm.value.path, index_is_id=True)
|
||||||
|
elif bm.type == 'kindle_clippings':
|
||||||
|
# Find 'My Clippings' author=Kindle in database, or add
|
||||||
|
last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
|
||||||
|
mc_id = list(db.data.search_getting_ids('title:"My Clippings"', ''))
|
||||||
|
if mc_id:
|
||||||
|
db.add_format_with_hooks(mc_id[0], 'TXT', bm.value['path'],
|
||||||
|
index_is_id=True)
|
||||||
|
mi = db.get_metadata(mc_id[0], index_is_id=True)
|
||||||
|
mi.comments = last_update
|
||||||
|
db.set_metadata(mc_id[0], mi)
|
||||||
|
else:
|
||||||
|
mi = MetaInformation('My Clippings', authors = ['Kindle'])
|
||||||
|
mi.tags = ['Clippings']
|
||||||
|
mi.comments = last_update
|
||||||
|
db.add_books([bm.value['path']], ['txt'], [mi])
|
||||||
|
|
||||||
class KINDLE2(KINDLE):
|
class KINDLE2(KINDLE):
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ from calibre.devices.usbms.driver import USBMS, debug_print
|
|||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.devices.usbms.books import CollectionsBookList
|
from calibre.devices.usbms.books import CollectionsBookList
|
||||||
from calibre.utils.magick.draw import save_cover_data_to
|
from calibre.utils.magick.draw import save_cover_data_to
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
class KOBO(USBMS):
|
class KOBO(USBMS):
|
||||||
|
|
||||||
@ -76,6 +77,11 @@ class KOBO(USBMS):
|
|||||||
self.book_class = Book
|
self.book_class = Book
|
||||||
self.dbversion = 7
|
self.dbversion = 7
|
||||||
|
|
||||||
|
def create_annotations_path(self, mdata, device_path=None):
|
||||||
|
if device_path:
|
||||||
|
return device_path
|
||||||
|
return USBMS.create_annotations_path(self, mdata)
|
||||||
|
|
||||||
def books(self, oncard=None, end_session=True):
|
def books(self, oncard=None, end_session=True):
|
||||||
from calibre.ebooks.metadata.meta import path_to_ext
|
from calibre.ebooks.metadata.meta import path_to_ext
|
||||||
|
|
||||||
@ -750,9 +756,12 @@ class KOBO(USBMS):
|
|||||||
|
|
||||||
blists = {}
|
blists = {}
|
||||||
for i in paths:
|
for i in paths:
|
||||||
if booklists[i] is not None:
|
try:
|
||||||
#debug_print('Booklist: ', i)
|
if booklists[i] is not None:
|
||||||
blists[i] = booklists[i]
|
#debug_print('Booklist: ', i)
|
||||||
|
blists[i] = booklists[i]
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
opts = self.settings()
|
opts = self.settings()
|
||||||
if opts.extra_customization:
|
if opts.extra_customization:
|
||||||
collections = [x.lower().strip() for x in
|
collections = [x.lower().strip() for x in
|
||||||
@ -865,3 +874,21 @@ class KOBO(USBMS):
|
|||||||
else:
|
else:
|
||||||
debug_print("ImageID could not be retreived from the database")
|
debug_print("ImageID could not be retreived from the database")
|
||||||
|
|
||||||
|
def prepare_addable_books(self, paths):
|
||||||
|
'''
|
||||||
|
The Kobo supports an encrypted epub refered to as a kepub
|
||||||
|
Unfortunately Kobo decided to put the files on the device
|
||||||
|
with no file extension. I just hope that decision causes
|
||||||
|
them as much grief as it does me :-)
|
||||||
|
|
||||||
|
This has to make a temporary copy of the book files with a
|
||||||
|
epub extension to allow Calibre's normal processing to
|
||||||
|
deal with the file appropriately
|
||||||
|
'''
|
||||||
|
for idx, path in enumerate(paths):
|
||||||
|
if path.find('kepub') >= 0:
|
||||||
|
with closing(open(path)) as r:
|
||||||
|
tf = PersistentTemporaryFile(suffix='.epub')
|
||||||
|
tf.write(r.read())
|
||||||
|
paths[idx] = tf.name
|
||||||
|
return paths
|
||||||
|
@ -1068,6 +1068,12 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
'''
|
'''
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
def add_annotation_to_library(self, db, db_id, annotation):
|
||||||
|
'''
|
||||||
|
Add an annotation to the calibre library
|
||||||
|
'''
|
||||||
|
pass
|
||||||
|
|
||||||
def create_upload_path(self, path, mdata, fname, create_dirs=True):
|
def create_upload_path(self, path, mdata, fname, create_dirs=True):
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
maxlen = self.MAX_PATH_LEN
|
maxlen = self.MAX_PATH_LEN
|
||||||
@ -1147,3 +1153,6 @@ class Device(DeviceConfig, DevicePlugin):
|
|||||||
os.makedirs(filedir)
|
os.makedirs(filedir)
|
||||||
|
|
||||||
return filepath
|
return filepath
|
||||||
|
|
||||||
|
def create_annotations_path(self, mdata, device_path=None):
|
||||||
|
return self.create_upload_path(os.path.abspath('/<storage>'), mdata, 'x.bookmark', create_dirs=False)
|
||||||
|
@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
|
|||||||
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
|
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
|
||||||
from calibre.ebooks.chm.reader import CHMReader
|
from calibre.ebooks.chm.reader import CHMReader
|
||||||
log.debug('Opening CHM file')
|
log.debug('Opening CHM file')
|
||||||
rdr = CHMReader(chm_path, log, self.opts)
|
rdr = CHMReader(chm_path, log, input_encoding=self.opts.input_encoding)
|
||||||
log.debug('Extracting CHM to %s' % output_dir)
|
log.debug('Extracting CHM to %s' % output_dir)
|
||||||
rdr.extract_content(output_dir, debug_dump=debug_dump)
|
rdr.extract_content(output_dir, debug_dump=debug_dump)
|
||||||
self._chm_reader = rdr
|
self._chm_reader = rdr
|
||||||
|
@ -40,14 +40,14 @@ class CHMError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
class CHMReader(CHMFile):
|
class CHMReader(CHMFile):
|
||||||
def __init__(self, input, log, opts):
|
def __init__(self, input, log, input_encoding=None):
|
||||||
CHMFile.__init__(self)
|
CHMFile.__init__(self)
|
||||||
if isinstance(input, unicode):
|
if isinstance(input, unicode):
|
||||||
input = input.encode(filesystem_encoding)
|
input = input.encode(filesystem_encoding)
|
||||||
if not self.LoadCHM(input):
|
if not self.LoadCHM(input):
|
||||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||||
self.log = log
|
self.log = log
|
||||||
self.opts = opts
|
self.input_encoding = input_encoding
|
||||||
self._sourcechm = input
|
self._sourcechm = input
|
||||||
self._contents = None
|
self._contents = None
|
||||||
self._playorder = 0
|
self._playorder = 0
|
||||||
@ -156,8 +156,8 @@ class CHMReader(CHMFile):
|
|||||||
break
|
break
|
||||||
|
|
||||||
def _reformat(self, data, htmlpath):
|
def _reformat(self, data, htmlpath):
|
||||||
if self.opts.input_encoding:
|
if self.input_encoding:
|
||||||
data = data.decode(self.opts.input_encoding)
|
data = data.decode(self.input_encoding)
|
||||||
try:
|
try:
|
||||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||||
soup = BeautifulSoup(data)
|
soup = BeautifulSoup(data)
|
||||||
|
@ -693,6 +693,8 @@ OptionRecommendation(name='sr3_replace',
|
|||||||
def unarchive(self, path, tdir):
|
def unarchive(self, path, tdir):
|
||||||
extract(path, tdir)
|
extract(path, tdir)
|
||||||
files = list(walk(tdir))
|
files = list(walk(tdir))
|
||||||
|
files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding)
|
||||||
|
for f in files]
|
||||||
from calibre.customize.ui import available_input_formats
|
from calibre.customize.ui import available_input_formats
|
||||||
fmts = available_input_formats()
|
fmts = available_input_formats()
|
||||||
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
|
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
|
||||||
|
@ -305,11 +305,13 @@ class RTFInput(InputFormatPlugin):
|
|||||||
html = 'index.xhtml'
|
html = 'index.xhtml'
|
||||||
with open(html, 'wb') as f:
|
with open(html, 'wb') as f:
|
||||||
res = transform.tostring(result)
|
res = transform.tostring(result)
|
||||||
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||||
|
#clean multiple \n
|
||||||
|
res = re.sub('\n+', '\n', res)
|
||||||
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
||||||
res = re.sub('\s*<body>', '<body>', res)
|
# res = re.sub('\s*<body>', '<body>', res)
|
||||||
res = re.sub('(?<=\n)\n{2}',
|
# res = re.sub('(?<=\n)\n{2}',
|
||||||
u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||||
f.write(res)
|
f.write(res)
|
||||||
self.write_inline_css(inline_class, border_styles)
|
self.write_inline_css(inline_class, border_styles)
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
@ -376,13 +376,13 @@ class ParseRtf:
|
|||||||
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
msg += 'self.__run_level is "%s"\n' % self.__run_level
|
||||||
raise RtfInvalidCodeException, msg
|
raise RtfInvalidCodeException, msg
|
||||||
if self.__run_level > 1:
|
if self.__run_level > 1:
|
||||||
sys.stderr.write(_('File could be older RTF...\n'))
|
sys.stderr.write('File could be older RTF...\n')
|
||||||
if found_destination:
|
if found_destination:
|
||||||
if self.__run_level > 1:
|
if self.__run_level > 1:
|
||||||
sys.stderr.write(_(
|
sys.stderr.write(
|
||||||
'File also has newer RTF.\n'
|
'File also has newer RTF.\n'
|
||||||
'Will do the best to convert.\n'
|
'Will do the best to convert.\n'
|
||||||
))
|
)
|
||||||
add_brackets_obj = add_brackets.AddBrackets(
|
add_brackets_obj = add_brackets.AddBrackets(
|
||||||
in_file = self.__temp_file,
|
in_file = self.__temp_file,
|
||||||
bug_handler = RtfInvalidCodeException,
|
bug_handler = RtfInvalidCodeException,
|
||||||
|
@ -11,11 +11,11 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, tempfile
|
import sys, os, tempfile
|
||||||
|
|
||||||
from calibre.ebooks.rtf2xml import copy, check_brackets
|
from calibre.ebooks.rtf2xml import copy, check_brackets
|
||||||
# note to self. This is the first module in which I use tempfile. A good idea?
|
# note to self. This is the first module in which I use tempfile. A good idea?
|
||||||
"""
|
|
||||||
"""
|
|
||||||
class AddBrackets:
|
class AddBrackets:
|
||||||
"""
|
"""
|
||||||
Add brackets for old RTF.
|
Add brackets for old RTF.
|
||||||
@ -41,6 +41,7 @@ class AddBrackets:
|
|||||||
self.__copy = copy
|
self.__copy = copy
|
||||||
self.__write_to = tempfile.mktemp()
|
self.__write_to = tempfile.mktemp()
|
||||||
self.__run_level = run_level
|
self.__run_level = run_level
|
||||||
|
|
||||||
def __initiate_values(self):
|
def __initiate_values(self):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
@ -82,14 +83,16 @@ class AddBrackets:
|
|||||||
'cw<ci<subscript_' ,
|
'cw<ci<subscript_' ,
|
||||||
'cw<ci<superscrip',
|
'cw<ci<superscrip',
|
||||||
'cw<ci<underlined' ,
|
'cw<ci<underlined' ,
|
||||||
'cw<ul<underlined' ,
|
# 'cw<ul<underlined' ,
|
||||||
]
|
]
|
||||||
|
|
||||||
def __before_body_func(self, line):
|
def __before_body_func(self, line):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
if self.__token_info == 'mi<mk<body-open_':
|
if self.__token_info == 'mi<mk<body-open_':
|
||||||
self.__state = 'in_body'
|
self.__state = 'in_body'
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __in_body_func(self, line):
|
def __in_body_func(self, line):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
@ -108,6 +111,7 @@ class AddBrackets:
|
|||||||
self.__state = 'after_control_word'
|
self.__state = 'after_control_word'
|
||||||
else:
|
else:
|
||||||
self.__write_obj.write(line)
|
self.__write_obj.write(line)
|
||||||
|
|
||||||
def __after_control_word_func(self, line):
|
def __after_control_word_func(self, line):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
@ -122,6 +126,7 @@ class AddBrackets:
|
|||||||
self.__ignore_count = self.__ob_count
|
self.__ignore_count = self.__ob_count
|
||||||
else:
|
else:
|
||||||
self.__state = 'in_body'
|
self.__state = 'in_body'
|
||||||
|
|
||||||
def __write_group(self):
|
def __write_group(self):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
@ -141,6 +146,7 @@ class AddBrackets:
|
|||||||
self.__write_obj.write(inline_string)
|
self.__write_obj.write(inline_string)
|
||||||
self.__open_bracket = 1
|
self.__open_bracket = 1
|
||||||
self.__temp_group = []
|
self.__temp_group = []
|
||||||
|
|
||||||
def __change_permanent_group(self):
|
def __change_permanent_group(self):
|
||||||
"""
|
"""
|
||||||
use temp group to change permanent group
|
use temp group to change permanent group
|
||||||
@ -150,6 +156,7 @@ class AddBrackets:
|
|||||||
if token_info in self.__accept:
|
if token_info in self.__accept:
|
||||||
att = line[20:-1]
|
att = line[20:-1]
|
||||||
self.__inline[token_info] = att
|
self.__inline[token_info] = att
|
||||||
|
|
||||||
def __ignore_func(self, line):
|
def __ignore_func(self, line):
|
||||||
"""
|
"""
|
||||||
Don't add any brackets while inside of brackets RTF has already
|
Don't add any brackets while inside of brackets RTF has already
|
||||||
@ -159,12 +166,14 @@ class AddBrackets:
|
|||||||
if self.__token_info == 'cb<nu<clos-brack'and\
|
if self.__token_info == 'cb<nu<clos-brack'and\
|
||||||
self.__cb_count == self.__ignore_count:
|
self.__cb_count == self.__ignore_count:
|
||||||
self.__state = 'in_body'
|
self.__state = 'in_body'
|
||||||
|
|
||||||
def __check_brackets(self, in_file):
|
def __check_brackets(self, in_file):
|
||||||
self.__check_brack_obj = check_brackets.CheckBrackets\
|
self.__check_brack_obj = check_brackets.CheckBrackets\
|
||||||
(file = in_file)
|
(file = in_file)
|
||||||
good_br = self.__check_brack_obj.check_brackets()[0]
|
good_br = self.__check_brack_obj.check_brackets()[0]
|
||||||
if not good_br:
|
if not good_br:
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
def add_brackets(self):
|
def add_brackets(self):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
|
@ -5,14 +5,57 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, datetime
|
|
||||||
|
|
||||||
from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt
|
from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt
|
||||||
|
|
||||||
from calibre.gui2 import error_dialog
|
from calibre.gui2 import error_dialog
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.gui2.actions import InterfaceAction
|
from calibre.gui2.actions import InterfaceAction
|
||||||
|
from calibre.devices.usbms.device import Device
|
||||||
|
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||||
|
|
||||||
|
class Updater(QThread): # {{{
|
||||||
|
|
||||||
|
update_progress = pyqtSignal(int)
|
||||||
|
update_done = pyqtSignal()
|
||||||
|
|
||||||
|
def __init__(self, parent, db, device, annotation_map, done_callback):
|
||||||
|
QThread.__init__(self, parent)
|
||||||
|
self.errors = {}
|
||||||
|
self.db = db
|
||||||
|
self.keep_going = True
|
||||||
|
self.pd = ProgressDialog(_('Merging user annotations into database'), '',
|
||||||
|
0, len(annotation_map), parent=parent)
|
||||||
|
|
||||||
|
self.device = device
|
||||||
|
self.annotation_map = annotation_map
|
||||||
|
self.done_callback = done_callback
|
||||||
|
self.pd.canceled_signal.connect(self.canceled)
|
||||||
|
self.pd.setModal(True)
|
||||||
|
self.pd.show()
|
||||||
|
self.update_progress.connect(self.pd.set_value,
|
||||||
|
type=Qt.QueuedConnection)
|
||||||
|
self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
|
||||||
|
|
||||||
|
def canceled(self):
|
||||||
|
self.keep_going = False
|
||||||
|
self.pd.hide()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
for i, id_ in enumerate(self.annotation_map):
|
||||||
|
if not self.keep_going:
|
||||||
|
break
|
||||||
|
bm = Device.UserAnnotation(self.annotation_map[id_][0],
|
||||||
|
self.annotation_map[id_][1])
|
||||||
|
try:
|
||||||
|
self.device.add_annotation_to_library(self.db, id_, bm)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
self.errors[id_] = traceback.format_exc()
|
||||||
|
self.update_progress.emit(i)
|
||||||
|
self.update_done.emit()
|
||||||
|
self.done_callback(self.annotation_map.keys(), self.errors)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
class FetchAnnotationsAction(InterfaceAction):
|
class FetchAnnotationsAction(InterfaceAction):
|
||||||
|
|
||||||
@ -41,13 +84,21 @@ class FetchAnnotationsAction(InterfaceAction):
|
|||||||
fmts.append(format.lower())
|
fmts.append(format.lower())
|
||||||
return fmts
|
return fmts
|
||||||
|
|
||||||
|
def get_device_path_from_id(id_):
|
||||||
|
paths = []
|
||||||
|
for x in ('memory', 'card_a', 'card_b'):
|
||||||
|
x = getattr(self.gui, x+'_view').model()
|
||||||
|
paths += x.paths_for_db_ids(set([id_]), as_map=True)[id_]
|
||||||
|
return paths[0].path if paths else None
|
||||||
|
|
||||||
def generate_annotation_paths(ids, db, device):
|
def generate_annotation_paths(ids, db, device):
|
||||||
# Generate path templates
|
# Generate path templates
|
||||||
# Individual storage mount points scanned/resolved in driver.get_annotations()
|
# Individual storage mount points scanned/resolved in driver.get_annotations()
|
||||||
path_map = {}
|
path_map = {}
|
||||||
for id in ids:
|
for id in ids:
|
||||||
|
path = get_device_path_from_id(id)
|
||||||
mi = db.get_metadata(id, index_is_id=True)
|
mi = db.get_metadata(id, index_is_id=True)
|
||||||
a_path = device.create_upload_path(os.path.abspath('/<storage>'), mi, 'x.bookmark', create_dirs=False)
|
a_path = device.create_annotations_path(mi, device_path=path)
|
||||||
path_map[id] = dict(path=a_path, fmts=get_formats(id))
|
path_map[id] = dict(path=a_path, fmts=get_formats(id))
|
||||||
return path_map
|
return path_map
|
||||||
|
|
||||||
@ -78,166 +129,6 @@ class FetchAnnotationsAction(InterfaceAction):
|
|||||||
path_map)
|
path_map)
|
||||||
|
|
||||||
def annotations_fetched(self, job):
|
def annotations_fetched(self, job):
|
||||||
from calibre.devices.usbms.device import Device
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
|
||||||
from calibre.library.cli import do_add_format
|
|
||||||
|
|
||||||
class Updater(QThread): # {{{
|
|
||||||
|
|
||||||
update_progress = pyqtSignal(int)
|
|
||||||
update_done = pyqtSignal()
|
|
||||||
FINISHED_READING_PCT_THRESHOLD = 96
|
|
||||||
|
|
||||||
def __init__(self, parent, db, annotation_map, done_callback):
|
|
||||||
QThread.__init__(self, parent)
|
|
||||||
self.db = db
|
|
||||||
self.pd = ProgressDialog(_('Merging user annotations into database'), '',
|
|
||||||
0, len(job.result), parent=parent)
|
|
||||||
|
|
||||||
self.am = annotation_map
|
|
||||||
self.done_callback = done_callback
|
|
||||||
self.pd.canceled_signal.connect(self.canceled)
|
|
||||||
self.pd.setModal(True)
|
|
||||||
self.pd.show()
|
|
||||||
self.update_progress.connect(self.pd.set_value,
|
|
||||||
type=Qt.QueuedConnection)
|
|
||||||
self.update_done.connect(self.pd.hide, type=Qt.QueuedConnection)
|
|
||||||
|
|
||||||
def generate_annotation_html(self, bookmark):
|
|
||||||
# Returns <div class="user_annotations"> ... </div>
|
|
||||||
last_read_location = bookmark.last_read_location
|
|
||||||
timestamp = datetime.datetime.utcfromtimestamp(bookmark.timestamp)
|
|
||||||
percent_read = bookmark.percent_read
|
|
||||||
|
|
||||||
ka_soup = BeautifulSoup()
|
|
||||||
dtc = 0
|
|
||||||
divTag = Tag(ka_soup,'div')
|
|
||||||
divTag['class'] = 'user_annotations'
|
|
||||||
|
|
||||||
# Add the last-read location
|
|
||||||
spanTag = Tag(ka_soup, 'span')
|
|
||||||
spanTag['style'] = 'font-weight:bold'
|
|
||||||
if bookmark.book_format == 'pdf':
|
|
||||||
spanTag.insert(0,NavigableString(
|
|
||||||
_("%(time)s<br />Last Page Read: %(loc)d (%(pr)d%%)") % \
|
|
||||||
dict(time=strftime(u'%x', timestamp.timetuple()),
|
|
||||||
loc=last_read_location,
|
|
||||||
pr=percent_read)))
|
|
||||||
else:
|
|
||||||
spanTag.insert(0,NavigableString(
|
|
||||||
_("%(time)s<br />Last Page Read: Location %(loc)d (%(pr)d%%)") % \
|
|
||||||
dict(time=strftime(u'%x', timestamp.timetuple()),
|
|
||||||
loc=last_read_location,
|
|
||||||
pr=percent_read)))
|
|
||||||
|
|
||||||
divTag.insert(dtc, spanTag)
|
|
||||||
dtc += 1
|
|
||||||
divTag.insert(dtc, Tag(ka_soup,'br'))
|
|
||||||
dtc += 1
|
|
||||||
|
|
||||||
if bookmark.user_notes:
|
|
||||||
user_notes = bookmark.user_notes
|
|
||||||
annotations = []
|
|
||||||
|
|
||||||
# Add the annotations sorted by location
|
|
||||||
# Italicize highlighted text
|
|
||||||
for location in sorted(user_notes):
|
|
||||||
if user_notes[location]['text']:
|
|
||||||
annotations.append(
|
|
||||||
_('<b>Location %(dl)d • %(typ)s</b><br />%(text)s<br />') % \
|
|
||||||
dict(dl=user_notes[location]['displayed_location'],
|
|
||||||
typ=user_notes[location]['type'],
|
|
||||||
text=(user_notes[location]['text'] if \
|
|
||||||
user_notes[location]['type'] == 'Note' else \
|
|
||||||
'<i>%s</i>' % user_notes[location]['text'])))
|
|
||||||
else:
|
|
||||||
if bookmark.book_format == 'pdf':
|
|
||||||
annotations.append(
|
|
||||||
_('<b>Page %(dl)d • %(typ)s</b><br />') % \
|
|
||||||
dict(dl=user_notes[location]['displayed_location'],
|
|
||||||
typ=user_notes[location]['type']))
|
|
||||||
else:
|
|
||||||
annotations.append(
|
|
||||||
_('<b>Location %(dl)d • %(typ)s</b><br />') % \
|
|
||||||
dict(dl=user_notes[location]['displayed_location'],
|
|
||||||
typ=user_notes[location]['type']))
|
|
||||||
|
|
||||||
for annotation in annotations:
|
|
||||||
divTag.insert(dtc, annotation)
|
|
||||||
dtc += 1
|
|
||||||
|
|
||||||
ka_soup.insert(0,divTag)
|
|
||||||
return ka_soup
|
|
||||||
|
|
||||||
'''
|
|
||||||
def mark_book_as_read(self,id):
|
|
||||||
read_tag = gprefs.get('catalog_epub_mobi_read_tag')
|
|
||||||
if read_tag:
|
|
||||||
self.db.set_tags(id, [read_tag], append=True)
|
|
||||||
'''
|
|
||||||
|
|
||||||
def canceled(self):
|
|
||||||
self.pd.hide()
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
ignore_tags = set(['Catalog','Clippings'])
|
|
||||||
for (i, id) in enumerate(self.am):
|
|
||||||
bm = Device.UserAnnotation(self.am[id][0],self.am[id][1])
|
|
||||||
if bm.type == 'kindle_bookmark':
|
|
||||||
mi = self.db.get_metadata(id, index_is_id=True)
|
|
||||||
user_notes_soup = self.generate_annotation_html(bm.value)
|
|
||||||
if mi.comments:
|
|
||||||
a_offset = mi.comments.find('<div class="user_annotations">')
|
|
||||||
ad_offset = mi.comments.find('<hr class="annotations_divider" />')
|
|
||||||
|
|
||||||
if a_offset >= 0:
|
|
||||||
mi.comments = mi.comments[:a_offset]
|
|
||||||
if ad_offset >= 0:
|
|
||||||
mi.comments = mi.comments[:ad_offset]
|
|
||||||
if set(mi.tags).intersection(ignore_tags):
|
|
||||||
continue
|
|
||||||
if mi.comments:
|
|
||||||
hrTag = Tag(user_notes_soup,'hr')
|
|
||||||
hrTag['class'] = 'annotations_divider'
|
|
||||||
user_notes_soup.insert(0,hrTag)
|
|
||||||
|
|
||||||
mi.comments += user_notes_soup.prettify()
|
|
||||||
else:
|
|
||||||
mi.comments = unicode(user_notes_soup.prettify())
|
|
||||||
# Update library comments
|
|
||||||
self.db.set_comment(id, mi.comments)
|
|
||||||
|
|
||||||
'''
|
|
||||||
# Update 'read' tag except for Catalogs/Clippings
|
|
||||||
if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD:
|
|
||||||
if not set(mi.tags).intersection(ignore_tags):
|
|
||||||
self.mark_book_as_read(id)
|
|
||||||
'''
|
|
||||||
|
|
||||||
# Add bookmark file to id
|
|
||||||
self.db.add_format_with_hooks(id, bm.value.bookmark_extension,
|
|
||||||
bm.value.path, index_is_id=True)
|
|
||||||
self.update_progress.emit(i)
|
|
||||||
elif bm.type == 'kindle_clippings':
|
|
||||||
# Find 'My Clippings' author=Kindle in database, or add
|
|
||||||
last_update = 'Last modified %s' % strftime(u'%x %X',bm.value['timestamp'].timetuple())
|
|
||||||
mc_id = list(db.data.parse('title:"My Clippings"'))
|
|
||||||
if mc_id:
|
|
||||||
do_add_format(self.db, mc_id[0], 'TXT', bm.value['path'])
|
|
||||||
mi = self.db.get_metadata(mc_id[0], index_is_id=True)
|
|
||||||
mi.comments = last_update
|
|
||||||
self.db.set_metadata(mc_id[0], mi)
|
|
||||||
else:
|
|
||||||
mi = MetaInformation('My Clippings', authors = ['Kindle'])
|
|
||||||
mi.tags = ['Clippings']
|
|
||||||
mi.comments = last_update
|
|
||||||
self.db.add_books([bm.value['path']], ['txt'], [mi])
|
|
||||||
|
|
||||||
self.update_done.emit()
|
|
||||||
self.done_callback(self.am.keys())
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
if not job.result: return
|
if not job.result: return
|
||||||
|
|
||||||
@ -246,9 +137,25 @@ class FetchAnnotationsAction(InterfaceAction):
|
|||||||
_('User annotations generated from main library only'),
|
_('User annotations generated from main library only'),
|
||||||
show=True)
|
show=True)
|
||||||
db = self.gui.library_view.model().db
|
db = self.gui.library_view.model().db
|
||||||
|
device = self.gui.device_manager.device
|
||||||
|
|
||||||
self.__annotation_updater = Updater(self.gui, db, job.result,
|
self.__annotation_updater = Updater(self.gui, db, device, job.result,
|
||||||
self.Dispatcher(self.gui.library_view.model().refresh_ids))
|
self.Dispatcher(self.annotations_updated))
|
||||||
self.__annotation_updater.start()
|
self.__annotation_updater.start()
|
||||||
|
|
||||||
|
def annotations_updated(self, ids, errors):
|
||||||
|
self.gui.library_view.model().refresh_ids(ids)
|
||||||
|
if errors:
|
||||||
|
db = self.gui.library_view.model().db
|
||||||
|
entries = []
|
||||||
|
for id_, tb in errors.iteritems():
|
||||||
|
title = id_
|
||||||
|
if isinstance(id_, type(1)):
|
||||||
|
title = db.title(id_, index_is_id=True)
|
||||||
|
entries.extend([title, tb, ''])
|
||||||
|
error_dialog(self.gui, _('Some errors'),
|
||||||
|
_('Could not fetch annotations for some books. Click '
|
||||||
|
'show details to see which ones.'),
|
||||||
|
det_msg='\n'.join(entries), show=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1239,11 +1239,14 @@ class DeviceBooksModel(BooksModel): # {{{
|
|||||||
def paths(self, rows):
|
def paths(self, rows):
|
||||||
return [self.db[self.map[r.row()]].path for r in rows ]
|
return [self.db[self.map[r.row()]].path for r in rows ]
|
||||||
|
|
||||||
def paths_for_db_ids(self, db_ids):
|
def paths_for_db_ids(self, db_ids, as_map=False):
|
||||||
res = []
|
res = defaultdict(list) if as_map else []
|
||||||
for r,b in enumerate(self.db):
|
for r,b in enumerate(self.db):
|
||||||
if b.application_id in db_ids:
|
if b.application_id in db_ids:
|
||||||
res.append((r,b))
|
if as_map:
|
||||||
|
res[b.application_id].append(b)
|
||||||
|
else:
|
||||||
|
res.append((r,b))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def get_collections_with_ids(self):
|
def get_collections_with_ids(self):
|
||||||
|
@ -242,6 +242,10 @@ Replace ``192.168.1.2`` with the local IP address of the computer running |app|.
|
|||||||
|
|
||||||
If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.
|
If you get timeout errors while browsing the calibre catalog in Stanza, try increasing the connection timeout value in the stanza settings. Go to Info->Settings and increase the value of Download Timeout.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
As of iOS version 5 Stanza no longer works on Apple devices. Alternatives to Stanza are discussed `here <http://www.mobileread.com/forums/showthread.php?t=152789>`_.
|
||||||
|
|
||||||
|
|
||||||
Using iBooks
|
Using iBooks
|
||||||
**************
|
**************
|
||||||
|
|
||||||
@ -251,7 +255,7 @@ Start the Safari browser and type in the IP address and port of the computer run
|
|||||||
|
|
||||||
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
|
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
|
||||||
|
|
||||||
You wills ee a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
|
You will see a list of books in Safari, just click on the epub link for whichever book you want to read, Safari will then prompt you to open it with iBooks.
|
||||||
|
|
||||||
|
|
||||||
With the USB cable + iTunes
|
With the USB cable + iTunes
|
||||||
|
Loading…
x
Reference in New Issue
Block a user