mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Merge from trunk
This commit is contained in:
commit
b7f2bb23df
@ -19,6 +19,81 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.32
|
||||
date: 2011-12-23
|
||||
|
||||
new features:
|
||||
- title: "Linux: When deleting books, send them to the recycle bin, instead of permanently deleting. This is the same behavior as on Windows and OS X."
|
||||
|
||||
- title: "Add a checkbox to allow users to disable the popup that asks if books should be auto-converted before sending to device"
|
||||
|
||||
- title: "Drivers for Droid Razr, Samsung GT-I9003 and Bookeen Odyssey"
|
||||
tickets: [906356, 906056, 905862]
|
||||
|
||||
- title: "Allow passing multiple filenames as command line arguments to calibre, to add multiple books."
|
||||
tickets: [907968]
|
||||
|
||||
bug fixes:
|
||||
- title: "MOBI Output: Fix regression in 0.8.30 that caused the use of hidden heading elements for the TOC to generate links in the wrong place."
|
||||
tickets: [907156]
|
||||
|
||||
- title: "EPUB Output: Ensure directories have the correct permissions bits set when unzipping an epub with unzip on Unix"
|
||||
|
||||
- title: "Fix bottom most shortcuts in keyboard shortcuts for viewer not editable"
|
||||
|
||||
- title: "EPUB Output: Fix handling of self closing <audio> tags."
|
||||
tickets: [906521]
|
||||
|
||||
- title: "MOBI Input: Map invalid <o:p> tags to <p> tags before parsing, to handle broken nesting."
|
||||
tickets: [905715]
|
||||
|
||||
- title: "Conversion pipeline: HTML5 parsing: Fix handling of XML namespaces. Fixes regression in 0.8.30 that caused some articles in some news downloads to appear blank when viewed in Adobe Digital Editions based readers"
|
||||
|
||||
- title: "Get Books: Gandalf store, fix price and cover detection"
|
||||
|
||||
- title: "EPUB Output: Fix the Flatten filenames option in EPUB Output causing duplicated manifest ids in rare cases."
|
||||
tickets: [905692]
|
||||
|
||||
- title: "When adding books via ISBN, show the user the list of invalid ISBNs that will be ignored, if any, before starting the add operation."
|
||||
tickets: [905690]
|
||||
|
||||
- title: "Fix unsmarten punctuation conversion option broken in 0.8.31."
|
||||
tickets: [905596]
|
||||
|
||||
- title: "Fix broken evaluation of composite columns in save-to-disk"
|
||||
|
||||
improved recipes:
|
||||
- Cosmopolitan UK
|
||||
- Hindustan Times
|
||||
- HVG
|
||||
- moneynews.com
|
||||
- Ming Pao
|
||||
- Glasgow Herald
|
||||
- Times of India
|
||||
- Focus Magazine
|
||||
- Hacker News
|
||||
- Independent
|
||||
- Sueddeutsche
|
||||
|
||||
new recipes:
|
||||
- title: Prospect Magazine UK
|
||||
author: Barty and duoloz
|
||||
|
||||
- title: Elet es Irodalom and NOL
|
||||
author: Bigpapa
|
||||
|
||||
- title: Salonica Press News
|
||||
author: SteliosGero
|
||||
|
||||
- title: Echo Online
|
||||
author: Armin Geller
|
||||
|
||||
- title: Various Polish news sources
|
||||
author: fenuks
|
||||
|
||||
- title: Various Italian news sources
|
||||
author: faber1971
|
||||
|
||||
- version: 0.8.31
|
||||
date: 2011-12-16
|
||||
|
||||
|
@ -1,19 +1,38 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
import re
|
||||
class Adventure_zone(BasicNewsRecipe):
|
||||
title = u'Adventure Zone'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Adventure zone - adventure games from A to Z'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content=False
|
||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
|
||||
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
|
||||
remove_tags_after= dict(id='comments')
|
||||
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
|
||||
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
|
||||
|
||||
def parse_feeds (self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
|
||||
tag=soup.find(name='channel')
|
||||
titles=[]
|
||||
for r in tag.findAll(name='image'):
|
||||
r.extract()
|
||||
art=tag.findAll(name='item')
|
||||
for i in art:
|
||||
titles.append(i.title.string)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
article.title=titles[feed.articles.index(article)]
|
||||
return feeds
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
|
||||
cover=soup.find(id='box_OstatninumerAZ')
|
||||
@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
skip_tag = soup.body.findAll(name='a')
|
||||
if skip_tag is not None:
|
||||
for r in skip_tag:
|
||||
if 'articles.php?' in r['href']:
|
||||
if r.strong is not None:
|
||||
word=r.strong.string
|
||||
if ('zapowied' or 'recenzj') in word:
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
|
||||
else:
|
||||
None
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('news.php?readmore', 'print.php?type=N&item_id')
|
||||
|
||||
skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
|
||||
skip_tag = skip_tag.findAll(name='a')
|
||||
for r in skip_tag:
|
||||
if r.strong:
|
||||
word=r.strong.string
|
||||
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
|
@ -1,5 +1,4 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AstroNEWS(BasicNewsRecipe):
|
||||
title = u'AstroNEWS'
|
||||
__author__ = 'fenuks'
|
||||
@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
#extra_css= 'table {text-align: left;}'
|
||||
no_stylesheets=True
|
||||
cover_url='http://news.astronet.pl/img/logo_news.jpg'
|
||||
# no_stylesheets= True
|
||||
remove_tags=[dict(name='hr')]
|
||||
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(align=True):
|
||||
del item['align']
|
||||
return soup
|
||||
|
19
recipes/biolog_pl.recipe
Normal file
19
recipes/biolog_pl.recipe
Normal file
@ -0,0 +1,19 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Biolog_pl(BasicNewsRecipe):
|
||||
title = u'Biolog.pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds=True
|
||||
__author__ = 'fenuks'
|
||||
description = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
|
||||
category = 'biology'
|
||||
language = 'pl'
|
||||
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
|
||||
no_stylesheets = True
|
||||
#keeps_only_tags=[dict(id='main')]
|
||||
remove_tags_before=dict(id='main')
|
||||
remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
|
||||
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
|
||||
feeds = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
|
44
recipes/birmingham_post.recipe
Normal file
44
recipes/birmingham_post.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Birmingham post'
|
||||
description = 'News for Birmingham UK'
|
||||
timefmt = ''
|
||||
__author__ = 'Dave Asbury'
|
||||
cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
|
||||
|
||||
masthead_url = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
#dict(name='h1',attrs={'id' : 'article-headline'}),
|
||||
#dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
|
||||
#dict(name='p')
|
||||
#dict(attrs={'id' : 'three-col'})
|
||||
]
|
||||
remove_tags = [
|
||||
# dict(name='div',attrs={'class' : 'span-33 last header-links'})
|
||||
|
||||
]
|
||||
feeds = [
|
||||
#(u'News',u'http://www.birminghampost.net/news/rss.xml'),
|
||||
(u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
|
||||
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
|
||||
(u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
|
||||
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
|
||||
|
||||
]
|
||||
extra_css = '''
|
||||
body {font: sans-serif medium;}'
|
||||
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
|
||||
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
|
||||
span{ font-size:9.5px; font-weight:bold;font-style:italic}
|
||||
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
|
||||
'''
|
22
recipes/computerworld_pl.recipe
Normal file
22
recipes/computerworld_pl.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Computerworld_pl(BasicNewsRecipe):
|
||||
title = u'Computerworld.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags=[dict(name='div', attrs={'id':'s'})]
|
||||
remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
|
||||
remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.computerworld.pl/')
|
||||
cover=soup.find(name='img', attrs={'class':'prawo'})
|
||||
self.cover_url=cover['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
@ -7,6 +7,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
#last update 21/12/11
|
||||
# greyscale code by Starson
|
||||
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
|
||||
no_stylesheets = True
|
||||
@ -31,8 +32,9 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
|
||||
dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
|
||||
dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
|
||||
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']})
|
||||
]
|
||||
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
|
||||
dict(name='li',attrs={'class' : 'thumb'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
|
||||
@ -48,4 +50,3 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
|
15
recipes/datasport.recipe
Normal file
15
recipes/datasport.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Italian soccer news website - v1.00 (17, December 2011)'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1324114272(BasicNewsRecipe):
|
||||
title = u'Datasport'
|
||||
language = 'it'
|
||||
__author__ = 'faber1971'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]
|
58
recipes/dziennik_pl.recipe
Normal file
58
recipes/dziennik_pl.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
class Dziennik_pl(BasicNewsRecipe):
|
||||
title = u'Dziennik.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
|
||||
category = 'newspaper'
|
||||
language = 'pl'
|
||||
cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
remove_empty_feeds=True
|
||||
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: '')]
|
||||
keep_only_tags=[dict(id='article')]
|
||||
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
|
||||
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
|
||||
(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
|
||||
(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
|
||||
(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
|
||||
(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
|
||||
(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
|
||||
(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
|
||||
(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
|
||||
(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
|
||||
(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
|
||||
(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
|
||||
(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
tag=soup.find('a', attrs={'class':'page_next'})
|
||||
if tag:
|
||||
appendtag.find('div', attrs={'class':'article_paginator'}).extract()
|
||||
while tag:
|
||||
soup2= self.index_to_soup(tag['href'])
|
||||
tag=soup2.find('a', attrs={'class':'page_next'})
|
||||
if not tag:
|
||||
for r in appendtag.findAll('div', attrs={'class':'art_src'}):
|
||||
r.extract()
|
||||
pagetext = soup2.find(name='div', attrs={'class':'article_body'})
|
||||
for dictionary in self.remove_tags:
|
||||
v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
|
||||
for delete in v:
|
||||
delete.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
if appendtag.find('div', attrs={'class':'article_paginator'}):
|
||||
appendtag.find('div', attrs={'class':'article_paginator'}).extract()
|
||||
|
||||
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
47
recipes/echo_online.recipe
Normal file
47
recipes/echo_online.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
|
||||
'''
|
||||
Fetch echo-online.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class Echo_Online(BasicNewsRecipe):
|
||||
title = u' Echo Online'
|
||||
description = '-Echo Online-'
|
||||
publisher = 'Echo Online GmbH'
|
||||
category = 'News, Germany'
|
||||
__author__ = 'Armin Geller' # 2011-12-17
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
encoding = 'iso-8859-1'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 2
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [
|
||||
(u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
|
||||
(u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
|
||||
(u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
|
||||
(u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
|
||||
(u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
|
||||
(u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
|
||||
(u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
|
||||
(u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
|
||||
(u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
|
||||
(u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
|
||||
(u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
|
||||
auto_cleanup_keep = '//div[@class="bild_gross w270"]'
|
||||
|
||||
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-ash2/41801_145340745513489_893927_n.jpg' # 2011-12-16 AGe
|
||||
cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif' # 2011-12-16 AGe
|
||||
|
48
recipes/elet_es_irodalom.recipe
Normal file
48
recipes/elet_es_irodalom.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
################################################################################
|
||||
#Description: http://es.hu/ RSS channel
|
||||
#Author: Bigpapa (bigpapabig@hotmail.com)
|
||||
#Date: 2010.12.01. - V1.0
|
||||
################################################################################
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class elet_es_irodalom(BasicNewsRecipe):
|
||||
title = u'Elet es Irodalom'
|
||||
__author__ = 'Bigpapa'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'iso-8859-2'
|
||||
category = 'Cikkek'
|
||||
language = 'hu'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
|
||||
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a', attrs={'target':['_TOP']}),
|
||||
dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
|
||||
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
|
||||
(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
|
||||
(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
|
||||
(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
|
||||
(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
|
||||
(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
|
||||
(u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
|
||||
(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
|
||||
(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
|
||||
|
||||
]
|
16
recipes/emuzica_pl.recipe
Normal file
16
recipes/emuzica_pl.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class eMuzyka(BasicNewsRecipe):
|
||||
title = u'eMuzyka'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
|
||||
category = 'music'
|
||||
language = 'pl'
|
||||
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
|
||||
remove_tags=[dict(name='span', attrs={'id':'date'})]
|
||||
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
|
18
recipes/fisco_oggi.recipe
Normal file
18
recipes/fisco_oggi.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1324112023(BasicNewsRecipe):
|
||||
title = u'Fisco Oggi'
|
||||
language = 'it'
|
||||
__author__ = 'faber1971'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]
|
||||
|
@ -1,57 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Focus_pl(BasicNewsRecipe):
|
||||
title = u'Focus.pl'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'fenuks'
|
||||
language = 'pl'
|
||||
description ='polish scientific monthly magazine'
|
||||
class FocusRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'intromatyk <intromatyk@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
title = u'Focus'
|
||||
publisher = u'Gruner + Jahr Polska'
|
||||
category = u'News'
|
||||
description = u'Newspaper'
|
||||
category='magazine'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
|
||||
remove_tags_after=dict(name='div', attrs={'class':'clear'})
|
||||
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
|
||||
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
|
||||
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
|
||||
(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
|
||||
(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
|
||||
(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
|
||||
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
|
||||
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
|
||||
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100000
|
||||
recursions = 0
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
# Seems to work best, but YMMV
|
||||
simultaneous_downloads = 5
|
||||
|
||||
r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
|
||||
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
h1{text-align: left;}
|
||||
h2{font-size: medium; font-weight: bold;}
|
||||
p.lead {font-weight: bold; text-align: left;}
|
||||
.authordate {font-size: small; color: #696969;}
|
||||
.fot{font-size: x-small; color: #666666;}
|
||||
'''
|
||||
|
||||
|
||||
|
||||
]
|
||||
feeds = [
|
||||
('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
|
||||
('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
|
||||
('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
|
||||
('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
|
||||
('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
|
||||
('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
|
||||
('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
|
||||
]
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
tag=soup.find(name='a')
|
||||
if tag:
|
||||
new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
|
||||
return new_soup
|
||||
|
||||
def append_page(self, appendtag):
|
||||
tag=appendtag.find(name='div', attrs={'class':'arrows'})
|
||||
if tag:
|
||||
nexturl='http://www.focus.pl/'+tag.a['href']
|
||||
for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
|
||||
rem.extract()
|
||||
while nexturl:
|
||||
soup2=self.index_to_soup(nexturl)
|
||||
nexturl=None
|
||||
pagetext=soup2.find(name='div', attrs={'class':'txt'})
|
||||
tag=pagetext.find(name='div', attrs={'class':'arrows'})
|
||||
for r in tag.findAll(name='a'):
|
||||
if u'Następne' in r.string:
|
||||
nexturl='http://www.focus.pl/'+r['href']
|
||||
for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
|
||||
rem.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
if ('advertisement' in soup.find('title').string.lower()):
|
||||
href = soup.find('a').get('href')
|
||||
return self.index_to_soup(href, raw=True)
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_cover_url(self):
|
||||
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
|
||||
@ -60,7 +71,14 @@ class Focus_pl(BasicNewsRecipe):
|
||||
self.cover_url='http://www.focus.pl/' + tag.a['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup.body)
|
||||
return soup
|
||||
def print_version(self, url):
|
||||
if url.count ('focus.pl.feedsportal.com'):
|
||||
u = url.find('focus0Bpl')
|
||||
u = 'http://www.focus.pl/' + url[u + 11:]
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace ('0E','-')
|
||||
u = u.replace('/nc/1//story01.htm', '/do-druku/1')
|
||||
else:
|
||||
u = url.replace('/nc/1','/do-druku/1')
|
||||
return u
|
@ -1,4 +1,3 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GlasgowHerald(BasicNewsRecipe):
|
||||
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
|
||||
language = 'en_GB'
|
||||
|
||||
__author__ = 'Kovid Goyal'
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'article'})]
|
||||
remove_tags = [
|
||||
dict(id=['pic-nav']),
|
||||
dict(attrs={'class':['comments-top']})
|
||||
]
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
#keep_only_tags = [dict(attrs={'class':'article'})]
|
||||
#remove_tags = [
|
||||
#dict(id=['pic-nav']),
|
||||
#dict(attrs={'class':['comments-top']})
|
||||
#]
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -25,5 +28,4 @@ class GlasgowHerald(BasicNewsRecipe):
|
||||
(u'Arts & Entertainment',
|
||||
u'http://www.heraldscotland.com/cmlink/1.768',),
|
||||
(u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
|
||||
|
||||
|
||||
|
||||
|
@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
|
||||
from urlparse import urlparse
|
||||
import re
|
||||
|
||||
class HackerNews(BasicNewsRecipe):
|
||||
title = 'Hacker News'
|
||||
__author__ = 'Tom Scholl'
|
||||
class HNWithCommentsLink(BasicNewsRecipe):
|
||||
title = 'HN With Comments Link'
|
||||
__author__ = 'Tom Scholl & David Kerschner'
|
||||
description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
|
||||
publisher = 'Y Combinator'
|
||||
category = 'news, programming, it, technology'
|
||||
@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
|
||||
body = body + comments
|
||||
return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
|
||||
|
||||
def parse_feeds(self):
|
||||
a = super(HNWithCommentsLink, self).parse_feeds()
|
||||
self.hn_articles = a[0].articles
|
||||
return a
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
if url.startswith('http://news.ycombinator.com'):
|
||||
content = self.get_hn_content(url)
|
||||
@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
|
||||
else:
|
||||
content = self.get_readable_content(url)
|
||||
|
||||
article = 0
|
||||
for a in self.hn_articles:
|
||||
if a.url == url:
|
||||
article = a
|
||||
|
||||
content = re.sub(r'</body>\s*</html>\s*$', '', content) + article.summary + '</body></html>'
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(content)
|
||||
self.temp_files[-1].close()
|
||||
|
@ -1,4 +1,5 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import urllib, re
|
||||
|
||||
class HindustanTimes(BasicNewsRecipe):
|
||||
title = u'Hindustan Times'
|
||||
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
|
||||
'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''
|
||||
HT uses a variant of the feedportal RSS ad display mechanism
|
||||
'''
|
||||
try:
|
||||
s = article.summary
|
||||
return urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
res = self.browser.open_novisit(url)
|
||||
url = res.geturl().split('/')[-2]
|
||||
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
|
||||
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
|
||||
'www.'}
|
||||
for k, v in encoding.iteritems():
|
||||
url = url.replace(k, v)
|
||||
return url
|
||||
|
||||
|
||||
|
@ -1,44 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
################################################################################
|
||||
#Description: http://hvg.hu/ RSS channel
|
||||
#Author: Bigpapa (bigpapabig@hotmail.com)
|
||||
#Date: 2011.12.20. - V1.1
|
||||
################################################################################
|
||||
|
||||
class HVG(BasicNewsRecipe):
|
||||
title = 'HVG.HU'
|
||||
__author__ = u'István Papp'
|
||||
description = u'Friss hírek a HVG-től'
|
||||
timefmt = ' [%Y. %b. %d., %a.]'
|
||||
oldest_article = 4
|
||||
language = 'hu'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'HVG Online'
|
||||
category = u'news, hírek, hvg'
|
||||
extra_css = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
remove_tags_before = dict(id='pg-content')
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
class hvg(BasicNewsRecipe):
|
||||
title = u'HVG'
|
||||
__author__ = 'Bigpapa'
|
||||
language = 'hu'
|
||||
oldest_article = 5 # Hany napos legyen a legregebbi cikk amit leszedjen.
|
||||
max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
extra_css = ' h2 { font:bold 28px} '
|
||||
|
||||
feeds = [
|
||||
(u'Itthon', u'http://hvg.hu/rss/itthon')
|
||||
,(u'Világ', u'http://hvg.hu/rss/vilag')
|
||||
,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
|
||||
,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
|
||||
,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
|
||||
,(u'Karrier', u'http://hvg.hu/rss/karrier')
|
||||
,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
|
||||
,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
|
||||
,(u'Kultúra', u'http://hvg.hu/rss/kultura')
|
||||
,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
|
||||
,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
|
||||
,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
|
||||
,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
|
||||
,(u'Sport', u'http://hvg.hu/rss/sport')
|
||||
]
|
||||
remove_attributes = ['style','font', 'href']
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace ('#rss', '/print')
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['pg-content']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
|
||||
dict(name='table', attrs={'class':['banner2', 'monocle']}),
|
||||
dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
|
||||
dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
|
||||
dict(name='h3', attrs={'class':['hthree']}),
|
||||
dict(name='ul', attrs={'class':['defaultul']}),
|
||||
dict(name='form', attrs={'id':['commentForm']}),
|
||||
dict(name='h6', attrs={'class':['hthree']}),
|
||||
dict(name='h6', attrs={'class':['more2']}),
|
||||
dict(name='img', attrs={'class':['framed']}),
|
||||
dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
|
||||
|
||||
|
||||
|
||||
]
|
||||
|
||||
feeds = [
|
||||
# (u'\xd6sszes', 'http://hvg.hu/rss'),
|
||||
(u'Itthon', 'http://hvg.hu/rss/itthon'),
|
||||
(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
|
||||
(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
|
||||
(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
|
||||
(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
|
||||
(u'Karrier', 'http://hvg.hu/rss/karrier'),
|
||||
(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
|
||||
(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
|
||||
(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
|
||||
(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
|
||||
(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
|
||||
(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
|
||||
(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
|
||||
(u'Sport', 'http://hvg.hu/rss/sport')
|
||||
]
|
BIN
recipes/icons/biolog_pl.png
Normal file
BIN
recipes/icons/biolog_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/computerworld_pl.png
Normal file
BIN
recipes/icons/computerworld_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 373 B |
BIN
recipes/icons/dziennik_pl.png
Normal file
BIN
recipes/icons/dziennik_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 481 B |
BIN
recipes/icons/kosmonauta_pl.png
Normal file
BIN
recipes/icons/kosmonauta_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/mlody_technik_pl.png
Normal file
BIN
recipes/icons/mlody_technik_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.1 KiB |
BIN
recipes/icons/moneynews.png
Normal file
BIN
recipes/icons/moneynews.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 914 B |
@ -109,7 +109,6 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
picdiv = soup.find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,picdiv['src'])
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
@ -273,12 +272,15 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
|
||||
|
||||
def _insertRatingStars(self,soup,item):
|
||||
if item.contents is None:
|
||||
if item.contents is None or len(item.contents) < 1:
|
||||
return
|
||||
rating = item.contents[0]
|
||||
if not rating.isdigit():
|
||||
return None
|
||||
rating = int(item.contents[0])
|
||||
|
||||
try:
|
||||
rating = float(item.contents[0])
|
||||
except:
|
||||
print 'Could not convert decimal rating to star: malformatted float.'
|
||||
return
|
||||
for i in range(1,6):
|
||||
star = Tag(soup,'img')
|
||||
if i <= rating:
|
||||
|
14
recipes/kosmonauta_pl.recipe
Normal file
14
recipes/kosmonauta_pl.recipe
Normal file
@ -0,0 +1,14 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Kosmonauta(BasicNewsRecipe):
|
||||
title = u'Kosmonauta.net'
|
||||
__author__ = 'fenuks'
|
||||
description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
|
||||
category = 'astronomy'
|
||||
language = 'pl'
|
||||
cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]
|
@ -10,6 +10,10 @@ __MakePeriodical__ = True
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||
__IncludeSummary__ = False
|
||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||
__IncludeThumbnails__ = True
|
||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
|
||||
__UseLife__ = True
|
||||
# (HK only) It is to disable premium content (Default: False)
|
||||
@ -24,12 +28,15 @@ __Date__ = ''
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||
2011/10/19: fix a bug in txt source parsing
|
||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||
2011/10/04: option to get hi-res photos for the articles
|
||||
2011/09/21: fetching "column" section is made optional.
|
||||
2011/09/21: fetching "column" section is made optional.
|
||||
2011/09/18: parse "column" section stuff from source text file directly.
|
||||
2011/09/07: disable "column" section as it is no longer offered free.
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
@ -53,6 +60,7 @@ Change Log:
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
@ -60,11 +68,15 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
||||
@ -109,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
else:
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -127,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -161,9 +179,9 @@ class MPRecipe(BasicNewsRecipe):
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
# convert UTC to local hk time - at HKT 4.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
@ -186,6 +204,18 @@ class MPRecipe(BasicNewsRecipe):
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[6:8]
|
||||
@ -237,7 +267,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
@ -274,7 +304,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
# articles = self.parse_section(url)
|
||||
@ -291,7 +321,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
@ -299,7 +329,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
@ -379,7 +409,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
try:
|
||||
try:
|
||||
br.open_novisit(url)
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
@ -406,7 +436,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
|
||||
# parse from www.mingpaovan.com
|
||||
def parse_section3(self, url, baseUrl):
|
||||
self.get_fetchdate()
|
||||
@ -528,7 +558,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
photo = photo.replace('class="photo"', '')
|
||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||
new_html = new_raw_html + '</body></html>'
|
||||
else:
|
||||
else:
|
||||
# .txt based file
|
||||
splitter = re.compile(r'\n') # Match non-digits
|
||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||
@ -591,23 +621,23 @@ class MPRecipe(BasicNewsRecipe):
|
||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||
if __HiResImg__ == True:
|
||||
# TODO: add a _ in front of an image url
|
||||
if url.rfind('news.mingpao.com') > -1:
|
||||
if url.rfind('news.mingpao.com') > -1:
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
for img in imglist:
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
try:
|
||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
except:
|
||||
# find the location of the first _
|
||||
pos = img.find('_')
|
||||
if pos > -1:
|
||||
# if found, insert _ after the first _
|
||||
newimg = img[0:pos] + '_' + img[pos:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
else:
|
||||
else:
|
||||
# if not found, insert _ after "
|
||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||
elif url.rfind('life.mingpao.com') > -1:
|
||||
@ -644,7 +674,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
#print 'Use hi-res img', newimg
|
||||
new_html = new_html.replace(img, newimg)
|
||||
return new_html
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
@ -653,78 +683,154 @@ class MPRecipe(BasicNewsRecipe):
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
# thumbnails shouldn't be available if using hi-res images
|
||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||
img = soup.find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
try:
|
||||
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
|
||||
# look for content
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
textFound = False
|
||||
for p in paras:
|
||||
if not textFound:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
|
||||
if len(summary_candidate) > 0:
|
||||
article.summary = article.text_summary = summary_candidate
|
||||
textFound = True
|
||||
else:
|
||||
# display a simple text
|
||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||
# display word counts
|
||||
counts = 0
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
for p in paras:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
counts += len(summary_candidate)
|
||||
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
# override from the one in version 0.8.31
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = self.short_title()
|
||||
# change 1: allow our own flag to tell if a periodical is to be generated
|
||||
# also use customed date instead of current time
|
||||
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
# end of change 1
|
||||
# change 2: __appname__ replaced by newspaper publisher
|
||||
__appname__ = self.publisher
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
# change 4: in the following, all the nowf() are changed to adjusted time
|
||||
# This one doesn't matter
|
||||
mi.timestamp = nowf()
|
||||
# change 5: skip listing the articles
|
||||
#article_titles, aseen = [], set()
|
||||
#for f in feeds:
|
||||
# for a in f:
|
||||
# if a.title and a.title not in aseen:
|
||||
# aseen.add(a.title)
|
||||
# article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
#mi.comments = self.description
|
||||
#if not isinstance(mi.comments, unicode):
|
||||
# mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
# '\n\n'.join(article_titles))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
@ -739,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
@ -762,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
@ -785,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
@ -799,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
|
||||
# Region - Hong Kong, Vancouver, Toronto
|
||||
__Region__ = 'Toronto'
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
# please replace the following "True" with "False". (Default: True)
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||
__IncludeSummary__ = False
|
||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||
__IncludeThumbnails__ = True
|
||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
|
||||
__UseLife__ = True
|
||||
# (HK only) It is to disable premium content (Default: False)
|
||||
__InclPremium__ = False
|
||||
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
|
||||
__ParsePFF__ = True
|
||||
# (HK only) Turn below to True if you wish hi-res images (Default: False)
|
||||
__HiResImg__ = False
|
||||
# Override the date returned by the program if specifying a YYYYMMDD below
|
||||
__Date__ = ''
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||
2011/10/19: fix a bug in txt source parsing
|
||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||
2011/10/04: option to get hi-res photos for the articles
|
||||
2011/09/21: fetching "column" section is made optional.
|
||||
2011/09/18: parse "column" section stuff from source text file directly.
|
||||
2011/09/07: disable "column" section as it is no longer offered free.
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
provide options to remove all images in the file
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
@ -37,30 +60,38 @@ Change Log:
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
import os, datetime, re
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'class':['heading']}), # for heading from txt
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['content']}), # for content from txt
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
|
||||
dict(attrs={'class':['images']}) # for images from txt
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='style'),
|
||||
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
else:
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
# minIdx = i0
|
||||
# i1 = url.find('1')
|
||||
# if i1 >= 0 and i1 < minIdx:
|
||||
# minIdx = i1
|
||||
# i2 = url.find('2')
|
||||
# if i2 >= 0 and i2 < minIdx:
|
||||
# minIdx = i2
|
||||
# i3 = url.find('3')
|
||||
# if i3 >= 0 and i0 < minIdx:
|
||||
# minIdx = i3
|
||||
# i4 = url.find('4')
|
||||
# if i4 >= 0 and i4 < minIdx:
|
||||
# minIdx = i4
|
||||
# i5 = url.find('5')
|
||||
# if i5 >= 0 and i5 < minIdx:
|
||||
# minIdx = i5
|
||||
# i6 = url.find('6')
|
||||
# if i6 >= 0 and i6 < minIdx:
|
||||
# minIdx = i6
|
||||
# i7 = url.find('7')
|
||||
# if i7 >= 0 and i7 < minIdx:
|
||||
# minIdx = i7
|
||||
# i8 = url.find('8')
|
||||
# if i8 >= 0 and i8 < minIdx:
|
||||
# minIdx = i8
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
# convert UTC to local hk time - at HKT 4.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
if __Region__ == 'Hong Kong':
|
||||
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
if __InclPremium__ == True:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
else:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
#ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
#if ed_articles:
|
||||
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
#fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
#if fin_articles:
|
||||
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
# articles = self.parse_section(url)
|
||||
# if articles:
|
||||
# feeds.append((title, articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
#if ent_articles:
|
||||
# feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
elif __Region__ == 'Vancouver':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
# replace the url to the print-friendly version
|
||||
if __ParsePFF__ == True:
|
||||
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
|
||||
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
||||
url = re.sub('%2F.*%2F', '/', url)
|
||||
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||
url = url.replace('%2Etxt', '_print.htm')
|
||||
url = url.replace('%5F', '_')
|
||||
else:
|
||||
url = url.replace('.htm', '_print.htm')
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
try:
|
||||
br.open_novisit(url)
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
except:
|
||||
print 'skipping a premium article'
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from text file of life.mingpao.com
|
||||
def parse_section2_txt(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# preprocess those .txt and javascript based files
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
new_html = raw_html
|
||||
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
|
||||
if url.rfind('_print.htm') <> -1:
|
||||
# javascript based file
|
||||
splitter = re.compile(r'\n')
|
||||
new_raw_html = '<html><head><title>Untitled</title></head>'
|
||||
new_raw_html = new_raw_html + '<body>'
|
||||
for item in splitter.split(raw_html):
|
||||
if item.startswith('var heading1 ='):
|
||||
heading = item.replace('var heading1 = \'', '')
|
||||
heading = heading.replace('\'', '')
|
||||
heading = heading.replace(';', '')
|
||||
new_raw_html = new_raw_html + '<div class="heading">' + heading
|
||||
if item.startswith('var heading2 ='):
|
||||
heading = item.replace('var heading2 = \'', '')
|
||||
heading = heading.replace('\'', '')
|
||||
heading = heading.replace(';', '')
|
||||
if heading <> '':
|
||||
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
|
||||
else:
|
||||
new_raw_html = new_raw_html + '</div>'
|
||||
if item.startswith('var content ='):
|
||||
content = item.replace("var content = ", '')
|
||||
content = content.replace('\'', '')
|
||||
content = content.replace(';', '')
|
||||
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
|
||||
if item.startswith('var photocontent ='):
|
||||
photo = item.replace('var photocontent = \'', '')
|
||||
photo = photo.replace('\'', '')
|
||||
photo = photo.replace(';', '')
|
||||
photo = photo.replace('<tr>', '')
|
||||
photo = photo.replace('<td>', '')
|
||||
photo = photo.replace('</tr>', '')
|
||||
photo = photo.replace('</td>', '<br>')
|
||||
photo = photo.replace('class="photo"', '')
|
||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||
new_html = new_raw_html + '</body></html>'
|
||||
else:
|
||||
# .txt based file
|
||||
splitter = re.compile(r'\n') # Match non-digits
|
||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||
next_is_img_txt = False
|
||||
title_started = False
|
||||
title_break_reached = False
|
||||
met_article_start_char = False
|
||||
for item in splitter.split(raw_html):
|
||||
item = item.strip()
|
||||
# if title already reached but break between title and content not yet found, record title_break_reached
|
||||
if title_started == True and title_break_reached == False and item == '':
|
||||
title_break_reached = True
|
||||
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
|
||||
# start content
|
||||
elif title_started == True and title_break_reached == True and met_article_start_char == False:
|
||||
if item <> '':
|
||||
met_article_start_char = True
|
||||
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
#if item.startswith(u'\u3010'):
|
||||
# met_article_start_char = True
|
||||
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False:
|
||||
if item.startswith("=@"):
|
||||
print 'skip movie link'
|
||||
elif item.startswith("=?"):
|
||||
next_is_img_txt = True
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
|
||||
elif item.startswith('=='):
|
||||
next_is_img_txt = True
|
||||
if False:
|
||||
# TODO: check existence of .gif first
|
||||
newimg = '_' + item[2:].strip() + '.jpg'
|
||||
new_raw_html += '<img src="' + newimg + '" /><p>\n'
|
||||
else:
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
|
||||
elif item.startswith('='):
|
||||
next_is_img_txt = True
|
||||
if False:
|
||||
# TODO: check existence of .gif first
|
||||
newimg = '_' + item[1:].strip() + '.jpg'
|
||||
new_raw_html += '<img src="' + newimg + '" /><p>\n'
|
||||
else:
|
||||
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False and met_article_start_char == False:
|
||||
if item <> '':
|
||||
if title_started == False:
|
||||
#print 'Title started at ', item
|
||||
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
|
||||
title_started = True
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '<p>\n'
|
||||
else:
|
||||
next_is_img_txt = False
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
new_html = new_raw_html + '</div></body></html>'
|
||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||
if __HiResImg__ == True:
|
||||
# TODO: add a _ in front of an image url
|
||||
if url.rfind('news.mingpao.com') > -1:
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
for img in imglist:
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
# find the location of the first _
|
||||
pos = img.find('_')
|
||||
if pos > -1:
|
||||
# if found, insert _ after the first _
|
||||
newimg = img[0:pos] + '_' + img[pos:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
else:
|
||||
# if not found, insert _ after "
|
||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||
elif url.rfind('life.mingpao.com') > -1:
|
||||
imglist = re.findall('src=\'?.*?jpg\'', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
#print 'Img list: ', imglist, '\n'
|
||||
for img in imglist:
|
||||
#print 'Found img: ', img
|
||||
gifimg = img.replace('jpg\'', 'gif\'')
|
||||
try:
|
||||
gifurl = re.sub(r'dailynews.*txt', '', url)
|
||||
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
pos = img.rfind('/')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
# repeat with src quoted by double quotes, for text parsed from src txt
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
for img in imglist:
|
||||
#print 'Found img: ', img
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
#print 'url', url
|
||||
pos = url.rfind('/')
|
||||
gifurl = url[:pos+1]
|
||||
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
|
||||
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
pos = img.find('"')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
#print 'Use hi-res img', newimg
|
||||
new_html = new_html.replace(img, newimg)
|
||||
return new_html
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
# thumbnails shouldn't be available if using hi-res images
|
||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||
img = soup.find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
try:
|
||||
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
|
||||
# look for content
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
textFound = False
|
||||
for p in paras:
|
||||
if not textFound:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
|
||||
if len(summary_candidate) > 0:
|
||||
article.summary = article.text_summary = summary_candidate
|
||||
textFound = True
|
||||
else:
|
||||
# display a simple text
|
||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||
# display word counts
|
||||
counts = 0
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
for p in paras:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
counts += len(summary_candidate)
|
||||
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
# override from the one in version 0.8.31
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = self.short_title()
|
||||
# change 1: allow our own flag to tell if a periodical is to be generated
|
||||
# also use customed date instead of current time
|
||||
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
# end of change 1
|
||||
# change 2: __appname__ replaced by newspaper publisher
|
||||
__appname__ = self.publisher
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
# change 4: in the following, all the nowf() are changed to adjusted time
|
||||
# This one doesn't matter
|
||||
mi.timestamp = nowf()
|
||||
# change 5: skip listing the articles
|
||||
#article_titles, aseen = [], set()
|
||||
#for f in feeds:
|
||||
# for a in f:
|
||||
# if a.title and a.title not in aseen:
|
||||
# aseen.add(a.title)
|
||||
# article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
#mi.comments = self.description
|
||||
#if not isinstance(mi.comments, unicode):
|
||||
# mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
# '\n\n'.join(article_titles))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
|
||||
# Region - Hong Kong, Vancouver, Toronto
|
||||
__Region__ = 'Vancouver'
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
# please replace the following "True" with "False". (Default: True)
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||
__IncludeSummary__ = False
|
||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||
__IncludeThumbnails__ = True
|
||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
|
||||
__UseLife__ = True
|
||||
# (HK only) It is to disable premium content (Default: False)
|
||||
__InclPremium__ = False
|
||||
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
|
||||
__ParsePFF__ = True
|
||||
# (HK only) Turn below to True if you wish hi-res images (Default: False)
|
||||
__HiResImg__ = False
|
||||
# Override the date returned by the program if specifying a YYYYMMDD below
|
||||
__Date__ = ''
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||
2011/10/19: fix a bug in txt source parsing
|
||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||
2011/10/04: option to get hi-res photos for the articles
|
||||
2011/09/21: fetching "column" section is made optional.
|
||||
2011/09/18: parse "column" section stuff from source text file directly.
|
||||
2011/09/07: disable "column" section as it is no longer offered free.
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
provide options to remove all images in the file
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
@ -37,30 +60,38 @@ Change Log:
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
import os, datetime, re
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'class':['heading']}), # for heading from txt
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['content']}), # for content from txt
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
|
||||
dict(attrs={'class':['images']}) # for images from txt
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='style'),
|
||||
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
else:
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
# minIdx = i0
|
||||
# i1 = url.find('1')
|
||||
# if i1 >= 0 and i1 < minIdx:
|
||||
# minIdx = i1
|
||||
# i2 = url.find('2')
|
||||
# if i2 >= 0 and i2 < minIdx:
|
||||
# minIdx = i2
|
||||
# i3 = url.find('3')
|
||||
# if i3 >= 0 and i0 < minIdx:
|
||||
# minIdx = i3
|
||||
# i4 = url.find('4')
|
||||
# if i4 >= 0 and i4 < minIdx:
|
||||
# minIdx = i4
|
||||
# i5 = url.find('5')
|
||||
# if i5 >= 0 and i5 < minIdx:
|
||||
# minIdx = i5
|
||||
# i6 = url.find('6')
|
||||
# if i6 >= 0 and i6 < minIdx:
|
||||
# minIdx = i6
|
||||
# i7 = url.find('7')
|
||||
# if i7 >= 0 and i7 < minIdx:
|
||||
# minIdx = i7
|
||||
# i8 = url.find('8')
|
||||
# if i8 >= 0 and i8 < minIdx:
|
||||
# minIdx = i8
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
# convert UTC to local hk time - at HKT 4.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
if __Region__ == 'Hong Kong':
|
||||
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
if __InclPremium__ == True:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
else:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
#ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
#if ed_articles:
|
||||
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
#fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
#if fin_articles:
|
||||
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
# articles = self.parse_section(url)
|
||||
# if articles:
|
||||
# feeds.append((title, articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
#if ent_articles:
|
||||
# feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
elif __Region__ == 'Vancouver':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
# replace the url to the print-friendly version
|
||||
if __ParsePFF__ == True:
|
||||
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
|
||||
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
||||
url = re.sub('%2F.*%2F', '/', url)
|
||||
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||
url = url.replace('%2Etxt', '_print.htm')
|
||||
url = url.replace('%5F', '_')
|
||||
else:
|
||||
url = url.replace('.htm', '_print.htm')
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
try:
|
||||
br.open_novisit(url)
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
except:
|
||||
print 'skipping a premium article'
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from text file of life.mingpao.com
|
||||
def parse_section2_txt(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# preprocess those .txt and javascript based files
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
new_html = raw_html
|
||||
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
|
||||
if url.rfind('_print.htm') <> -1:
|
||||
# javascript based file
|
||||
splitter = re.compile(r'\n')
|
||||
new_raw_html = '<html><head><title>Untitled</title></head>'
|
||||
new_raw_html = new_raw_html + '<body>'
|
||||
for item in splitter.split(raw_html):
|
||||
if item.startswith('var heading1 ='):
|
||||
heading = item.replace('var heading1 = \'', '')
|
||||
heading = heading.replace('\'', '')
|
||||
heading = heading.replace(';', '')
|
||||
new_raw_html = new_raw_html + '<div class="heading">' + heading
|
||||
if item.startswith('var heading2 ='):
|
||||
heading = item.replace('var heading2 = \'', '')
|
||||
heading = heading.replace('\'', '')
|
||||
heading = heading.replace(';', '')
|
||||
if heading <> '':
|
||||
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
|
||||
else:
|
||||
new_raw_html = new_raw_html + '</div>'
|
||||
if item.startswith('var content ='):
|
||||
content = item.replace("var content = ", '')
|
||||
content = content.replace('\'', '')
|
||||
content = content.replace(';', '')
|
||||
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
|
||||
if item.startswith('var photocontent ='):
|
||||
photo = item.replace('var photocontent = \'', '')
|
||||
photo = photo.replace('\'', '')
|
||||
photo = photo.replace(';', '')
|
||||
photo = photo.replace('<tr>', '')
|
||||
photo = photo.replace('<td>', '')
|
||||
photo = photo.replace('</tr>', '')
|
||||
photo = photo.replace('</td>', '<br>')
|
||||
photo = photo.replace('class="photo"', '')
|
||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||
new_html = new_raw_html + '</body></html>'
|
||||
else:
|
||||
# .txt based file
|
||||
splitter = re.compile(r'\n') # Match non-digits
|
||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||
next_is_img_txt = False
|
||||
title_started = False
|
||||
title_break_reached = False
|
||||
met_article_start_char = False
|
||||
for item in splitter.split(raw_html):
|
||||
item = item.strip()
|
||||
# if title already reached but break between title and content not yet found, record title_break_reached
|
||||
if title_started == True and title_break_reached == False and item == '':
|
||||
title_break_reached = True
|
||||
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
|
||||
# start content
|
||||
elif title_started == True and title_break_reached == True and met_article_start_char == False:
|
||||
if item <> '':
|
||||
met_article_start_char = True
|
||||
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
#if item.startswith(u'\u3010'):
|
||||
# met_article_start_char = True
|
||||
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False:
|
||||
if item.startswith("=@"):
|
||||
print 'skip movie link'
|
||||
elif item.startswith("=?"):
|
||||
next_is_img_txt = True
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
|
||||
elif item.startswith('=='):
|
||||
next_is_img_txt = True
|
||||
if False:
|
||||
# TODO: check existence of .gif first
|
||||
newimg = '_' + item[2:].strip() + '.jpg'
|
||||
new_raw_html += '<img src="' + newimg + '" /><p>\n'
|
||||
else:
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
|
||||
elif item.startswith('='):
|
||||
next_is_img_txt = True
|
||||
if False:
|
||||
# TODO: check existence of .gif first
|
||||
newimg = '_' + item[1:].strip() + '.jpg'
|
||||
new_raw_html += '<img src="' + newimg + '" /><p>\n'
|
||||
else:
|
||||
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False and met_article_start_char == False:
|
||||
if item <> '':
|
||||
if title_started == False:
|
||||
#print 'Title started at ', item
|
||||
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
|
||||
title_started = True
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '<p>\n'
|
||||
else:
|
||||
next_is_img_txt = False
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
new_html = new_raw_html + '</div></body></html>'
|
||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||
if __HiResImg__ == True:
|
||||
# TODO: add a _ in front of an image url
|
||||
if url.rfind('news.mingpao.com') > -1:
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
for img in imglist:
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
# find the location of the first _
|
||||
pos = img.find('_')
|
||||
if pos > -1:
|
||||
# if found, insert _ after the first _
|
||||
newimg = img[0:pos] + '_' + img[pos:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
else:
|
||||
# if not found, insert _ after "
|
||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||
elif url.rfind('life.mingpao.com') > -1:
|
||||
imglist = re.findall('src=\'?.*?jpg\'', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
#print 'Img list: ', imglist, '\n'
|
||||
for img in imglist:
|
||||
#print 'Found img: ', img
|
||||
gifimg = img.replace('jpg\'', 'gif\'')
|
||||
try:
|
||||
gifurl = re.sub(r'dailynews.*txt', '', url)
|
||||
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
pos = img.rfind('/')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
# repeat with src quoted by double quotes, for text parsed from src txt
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
for img in imglist:
|
||||
#print 'Found img: ', img
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
#print 'url', url
|
||||
pos = url.rfind('/')
|
||||
gifurl = url[:pos+1]
|
||||
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
|
||||
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
pos = img.find('"')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
#print 'Use hi-res img', newimg
|
||||
new_html = new_html.replace(img, newimg)
|
||||
return new_html
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
# thumbnails shouldn't be available if using hi-res images
|
||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||
img = soup.find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
try:
|
||||
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
|
||||
# look for content
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
textFound = False
|
||||
for p in paras:
|
||||
if not textFound:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
|
||||
if len(summary_candidate) > 0:
|
||||
article.summary = article.text_summary = summary_candidate
|
||||
textFound = True
|
||||
else:
|
||||
# display a simple text
|
||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||
# display word counts
|
||||
counts = 0
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
for p in paras:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
counts += len(summary_candidate)
|
||||
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
# override from the one in version 0.8.31
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = self.short_title()
|
||||
# change 1: allow our own flag to tell if a periodical is to be generated
|
||||
# also use customed date instead of current time
|
||||
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
# end of change 1
|
||||
# change 2: __appname__ replaced by newspaper publisher
|
||||
__appname__ = self.publisher
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
# change 4: in the following, all the nowf() are changed to adjusted time
|
||||
# This one doesn't matter
|
||||
mi.timestamp = nowf()
|
||||
# change 5: skip listing the articles
|
||||
#article_titles, aseen = [], set()
|
||||
#for f in feeds:
|
||||
# for a in f:
|
||||
# if a.title and a.title not in aseen:
|
||||
# aseen.add(a.title)
|
||||
# article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
#mi.comments = self.description
|
||||
#if not isinstance(mi.comments, unicode):
|
||||
# mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
# '\n\n'.join(article_titles))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
15
recipes/mlody_technik_pl.recipe
Normal file
15
recipes/mlody_technik_pl.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Mlody_technik(BasicNewsRecipe):
|
||||
title = u'Mlody technik'
|
||||
__author__ = 'fenuks'
|
||||
description = u'Młody technik'
|
||||
category = 'science'
|
||||
language = 'pl'
|
||||
cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
#keep_only_tags=[dict(id='container')]
|
||||
feeds = [(u'Artyku\u0142y', u'http://www.mt.com.pl/feed')]
|
@ -1,9 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
moneynews.newsmax.com
|
||||
www.moneynews.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -12,40 +10,40 @@ class MoneyNews(BasicNewsRecipe):
|
||||
title = 'Moneynews.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Financial news worldwide'
|
||||
publisher = 'moneynews.com'
|
||||
language = 'en'
|
||||
|
||||
publisher = 'Newsmax.com'
|
||||
language = 'en'
|
||||
category = 'news, finances, USA, business'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
encoding = 'utf8'
|
||||
extra_css = 'img{display: block} body{font-family: Arial, Helvetica, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u'Street Talk' , u'http://moneynews.newsmax.com/xml/streettalk.xml' )
|
||||
,(u'Finance News' , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
|
||||
,(u'Economy' , u'http://moneynews.newsmax.com/xml/economy.xml' )
|
||||
,(u'Companies' , u'http://moneynews.newsmax.com/xml/companies.xml' )
|
||||
,(u'Markets' , u'http://moneynews.newsmax.com/xml/Markets.xml' )
|
||||
,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml' )
|
||||
(u'Street Talk' , u'http://www.moneynews.com/rss/StreetTalk/8.xml' )
|
||||
,(u'Finance News' , u'http://www.moneynews.com/rss/FinanceNews/4.xml' )
|
||||
,(u'Economy' , u'http://www.moneynews.com/rss/Economy/2.xml' )
|
||||
,(u'Companies' , u'http://www.moneynews.com/rss/Companies/6.xml' )
|
||||
,(u'Markets' , u'http://www.moneynews.com/rss/Markets/7.xml' )
|
||||
,(u'Investing & Analysis' , u'http://www.moneynews.com/rss/InvestingAnalysis/17.xml')
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'copy'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='td' , attrs={'id':'article_fontsize'})
|
||||
,dict(name='table', attrs={'id':'toolbox' })
|
||||
,dict(name='tr' , attrs={'id':'noprint3' })
|
||||
dict(attrs={'class':['MsoNormal', 'MsoNoSpacing']}),
|
||||
dict(name=['object','link','embed','form','meta'])
|
||||
]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
nodeid = url.rpartition('/')[2]
|
||||
return 'http://www.moneynews.com/PrintTemplate?nodeid=' + nodeid
|
||||
|
@ -7,6 +7,7 @@ class naczytniki(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
description ='everything about e-readers'
|
||||
category='readers'
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_after= dict(name='div', attrs={'class':'sociable'})
|
||||
|
54
recipes/nol.recipe
Normal file
54
recipes/nol.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
################################################################################
|
||||
#Description: http://nol.hu/ RSS channel
|
||||
#Author: Bigpapa (bigpapabig@hotmail.com)
|
||||
#Date: 2011.12.18. - V1.1
|
||||
################################################################################
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class NOL(BasicNewsRecipe):
|
||||
title = u'NOL'
|
||||
__author__ = 'Bigpapa'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
language = 'hu'
|
||||
publication_type = 'newsportal'
|
||||
|
||||
conversion_options ={
|
||||
'linearize_tables' : True,
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='table', attrs={'class':['article-box']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
|
||||
dict(name='div', attrs={'class':['h','ad-container-outer','tags noborder','ad-container-inner','image-container-lead','tags','related-container']}),
|
||||
dict(name='h4'),
|
||||
dict(name='tfoot'),
|
||||
dict(name='td', attrs={'class':['foot']}),
|
||||
dict(name='span', attrs={'class':['image-container-caption']}),
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
# (u'V\xe1logat\xe1s', 'http://nol.hu/feed/valogatas.rss'),
|
||||
(u'Belf\xf6ld', 'http://nol.hu/feed/belfold.rss'),
|
||||
(u'K\xfclf\xf6ld', 'http://nol.hu/feed/kulfold.rss'),
|
||||
(u'Gazdas\xe1g', 'http://nol.hu/feed/gazdasag.rss'),
|
||||
(u'V\xe9lem\xe9ny', 'http://nol.hu/feed/velemeny.rss'),
|
||||
(u'Kult\xfara', 'http://nol.hu/feed/kult.rss'),
|
||||
(u'Tud/Tech', 'http://nol.hu/feed/tud-tech.rss'),
|
||||
(u'Sport', 'http://nol.hu/feed/sport.rss'),
|
||||
(u'Noller', 'http://nol.hu/feed/noller.rss'),
|
||||
(u'Mozaik', 'http://nol.hu/feed/mozaik.rss'),
|
||||
(u'Utaz\xe1s', 'http://nol.hu/feed/utazas.rss'),
|
||||
(u'Aut\xf3', 'http://nol.hu/feed/auto.rss'),
|
||||
(u'Voks', 'http://nol.hu/feed/voks.rss'),
|
||||
|
||||
]
|
@ -1,20 +1,21 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Nowa_Fantastyka(BasicNewsRecipe):
|
||||
title = u'Nowa Fantastyka'
|
||||
oldest_article = 7
|
||||
__author__ = 'fenuks'
|
||||
language = 'pl'
|
||||
encoding='latin2'
|
||||
description ='site for fantasy readers'
|
||||
category='fantasy'
|
||||
max_articles_per_feed = 100
|
||||
INDEX='http://www.fantastyka.pl/'
|
||||
no_stylesheets=True
|
||||
needs_subscription = 'optional'
|
||||
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
|
||||
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
|
||||
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
|
||||
remove_tags=[dict(attrs={'class':'avatar2'})]
|
||||
feeds = []
|
||||
remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
|
||||
|
||||
def find_articles(self, url):
|
||||
articles = []
|
||||
@ -45,3 +46,13 @@ class Nowa_Fantastyka(BasicNewsRecipe):
|
||||
cover=soup.find(name='img', attrs={'class':'okladka'})
|
||||
self.cover_url=self.INDEX+ cover['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.fantastyka.pl/')
|
||||
br.select_form(nr=0)
|
||||
br['login'] = self.username
|
||||
br['pass'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
79
recipes/prospectmaguk.recipe
Normal file
79
recipes/prospectmaguk.recipe
Normal file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
'''
|
||||
calibre recipe for prospectmagazine.co.uk (subscription)
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ProspectMagUK(BasicNewsRecipe):
|
||||
title = u'Prospect Magazine'
|
||||
description = 'A general-interest publication offering analysis and commentary about politics, news and business.'
|
||||
__author__ = 'barty, duluoz'
|
||||
timefmt = ' [%d %B %Y]'
|
||||
no_stylesheets = True
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://www.prospectmagazine.co.uk/wp-content/themes/prospect/images/titleMain.jpg'
|
||||
category = 'news, UK'
|
||||
language = 'en_GB'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
needs_subscription = True
|
||||
|
||||
auto_cleanup_keep = '//div[@class="lead_image"]'
|
||||
remove_tags = [{'class':['shareinpost','postutils','postinfo']}]
|
||||
|
||||
INDEX = 'http://www.prospectmagazine.co.uk/current-issue'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.prospectmagazine.co.uk/wp-login.php')
|
||||
br.select_form(name='loginform')
|
||||
br['log'] = self.username
|
||||
br['pwd'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
#div = soup.find('h1',text=re.compile(r'Issue \d+'))
|
||||
#fname = self.tag_to_string( div) if div is not None else 'Current Issue'
|
||||
div = soup.find('div', id='cover_image')
|
||||
if div is not None:
|
||||
img = div.find('img', src=True)
|
||||
if img is not None:
|
||||
src = img['src']
|
||||
if src.startswith('/'):
|
||||
src = 'http://www.prospectmagazine.co.uk' + src
|
||||
self.cover_url = src
|
||||
feeds = []
|
||||
# loop through sections
|
||||
for sect in soup.findAll('div',attrs={'class':'sectionheading'}):
|
||||
fname = self.tag_to_string( sect).replace('>','').strip()
|
||||
self.log('Found section', fname)
|
||||
articles = []
|
||||
|
||||
# note: can't just find siblings with class='post' because that will also
|
||||
# grab all the articles belonging to the sections that follow.
|
||||
for item in sect.findNextSiblings('div',attrs={'class':True}):
|
||||
if not 'post' in item['class']: break
|
||||
a = item.find('a', href=True)
|
||||
if a is None: continue
|
||||
url = a['href']
|
||||
title = self.tag_to_string(a)
|
||||
p = item.find('p')
|
||||
desc = self.tag_to_string( p) if p is not None else ''
|
||||
art = {'title':title, 'description':desc,'date':' ', 'url':url}
|
||||
p = item.find(attrs={'class':re.compile('author')})
|
||||
self.log('\tFound article:', title, '::', url)
|
||||
if p is not None:
|
||||
art['author'] = self.tag_to_string( p).strip()
|
||||
articles.append(art)
|
||||
|
||||
feeds.append((fname, articles))
|
||||
return feeds
|
17
recipes/salonica_press_news.recipe
Normal file
17
recipes/salonica_press_news.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class spn(BasicNewsRecipe):
|
||||
title = u'Salonica Press News'
|
||||
language = 'gr'
|
||||
__author__ = "SteliosGero"
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
category = 'news, GR'
|
||||
language = 'el'
|
||||
|
||||
|
||||
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', u'http://www.spnews.gr/politiki?format=feed&type=rss'), (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', u'http://www.spnews.gr/oikonomia?format=feed&type=rss'), (u'\u0391\u03c5\u03c4\u03bf\u03b4\u03b9\u03bf\u03af\u03ba\u03b7\u03c3\u03b7', u'http://www.spnews.gr/aftodioikisi?format=feed&type=rss'), (u'\u039a\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1', u'http://www.spnews.gr/koinonia?format=feed&type=rss'), (u'\u0391\u03b8\u03bb\u03b7\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/sports?format=feed&type=rss'), (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae', u'http://www.spnews.gr/diethni?format=feed&type=rss'), (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/politismos?format=feed&type=rss'), (u'Media', u'http://www.spnews.gr/media-news?format=feed&type=rss'), (u'\u0396\u03c9\u03ae', u'http://www.spnews.gr/zoi?format=feed&type=rss'), (u'\u03a4\u03b5\u03c7\u03bd\u03bf\u03bb\u03bf\u03b3\u03af\u03b1', u'http://spnews.gr/texnologia?format=feed&type=rss'), (u'\u03a0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd', u'http://spnews.gr/periballon?format=feed&type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03c0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parapolitika?format=feed&type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b4\u03b7\u03bc\u03bf\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/paradimotika?format=feed&type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b1\u03b8\u03bb\u03b7\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parathlitika?format=feed&type=rss'), (u'\u0391\u03c0\u03cc\u03c8\u03b5\u03b9\u03c2', u'http://spnews.gr/apopseis?format=feed&type=rss'), (u'\u03a3\u03c5\u03bd\u03b5\u03cd\u03be\u03b5\u03b9\u03c2', u'http://spnews.gr/synenteykseis?format=feed&type=rss'), (u'Alert!', u'http://spnews.gr/alert?format=feed&type=rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url+'?tmpl=component&print=1&layout=default&page='
|
@ -8,8 +8,8 @@ class SpidersWeb(BasicNewsRecipe):
|
||||
cover_url = 'http://www.spidersweb.pl/wp-content/themes/spiderweb/img/Logo.jpg'
|
||||
category = 'IT, WEB'
|
||||
language = 'pl'
|
||||
no_stylesheers=True
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_before=dict(name="h1", attrs={'class':'Title'})
|
||||
remove_tags_after=dict(name="div", attrs={'class':'Text'})
|
||||
remove_tags=[dict(name='div', attrs={'class':['Tags', 'CommentCount FloatL', 'Show FloatL']})]
|
||||
keep_only_tags=[dict(id='Post')]
|
||||
remove_tags=[dict(name='div', attrs={'class':['Comments', 'Shows', 'Post-Tags']})]
|
||||
feeds = [(u'Wpisy', u'http://www.spidersweb.pl/feed')]
|
||||
|
@ -6,92 +6,49 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Fetch sueddeutsche.de
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Sueddeutsche(BasicNewsRecipe):
|
||||
|
||||
title = u'sueddeutsche.de'
|
||||
description = 'News from Germany'
|
||||
__author__ = 'Oliver Niesner and Armin Geller' #AGe 2011-12-16
|
||||
__author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2011-12-16
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
language = 'de'
|
||||
auto_cleanup = True
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1236175.1323967473!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
|
||||
# 2011-12-16 AGe
|
||||
# remove_tags = [ dict(name='link'), dict(name='iframe'),
|
||||
# dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_AD",
|
||||
# "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
|
||||
#
|
||||
# dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
|
||||
# "pages closed","basebox right narrow","headslot galleried"]}),
|
||||
#
|
||||
# dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
|
||||
# "item","videoBigButton","articlefooter full-column",
|
||||
# "bildbanderolle full-column","footerCopy padleft5"]}),
|
||||
#
|
||||
# dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
|
||||
# dict(name='div', attrs={'style':["position:relative;"]}),
|
||||
# dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
|
||||
# dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
|
||||
# dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}),
|
||||
# dict(name='td', attrs={'class':["artikelDruckenRight"]}),
|
||||
# dict(name='p', text = "ANZEIGE")
|
||||
# ]
|
||||
# remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]
|
||||
#
|
||||
extra_css = '''
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
|
||||
a{font-family:Arial,Helvetica,sans-serif; font-style:italic;}
|
||||
.dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
|
||||
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
||||
.artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
|
||||
body{font-family:Arial,Helvetica,sans-serif; }
|
||||
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
|
||||
#
|
||||
auto_cleanup = True
|
||||
cover_url = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1237395.1324054345!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
|
||||
feeds = [
|
||||
# (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'), # AGe 2011-11-13
|
||||
# (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'), #AGe 2011-12-16 deactivated
|
||||
# (u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
|
||||
# (u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
|
||||
# (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
|
||||
# (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
|
||||
# (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
|
||||
(u'Politik', u'http://www.sueddeutsche.de/app/service/rss/ressort/politik/rss.xml'),
|
||||
(u'Wirtschaft', u'http://www.sueddeutsche.de/app/service/rss/ressort/wirtschaft/rss.xml'),
|
||||
(u'Geld', u'http://www.sueddeutsche.de/app/service/rss/ressort/finanzen/rss.xml'),
|
||||
(u'Kultur', u'http://www.sueddeutsche.de/app/service/rss/ressort/kultur/rss.xml'),
|
||||
(u'Sport', u'http://www.sueddeutsche.de/app/service/rss/ressort/sport/rss.xml'),
|
||||
(u'Leben', u'http://www.sueddeutsche.de/app/service/rss/ressort/leben/rss.xml'),
|
||||
(u'Karriere', u'http://www.sueddeutsche.de/app/service/rss/ressort/karriere/rss.xml'),
|
||||
(u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
|
||||
(u'Bayern', u'http://www.sueddeutsche.de/app/service/rss/ressort/bayern/rss.xml'),
|
||||
(u'Medien', u'http://www.sueddeutsche.de/app/service/rss/ressort/medien/rss.xml'),
|
||||
(u'Digital', u'http://www.sueddeutsche.de/app/service/rss/ressort/computerwissen/rss.xml'),
|
||||
(u'Auto', u'http://www.sueddeutsche.de/app/service/rss/ressort/autoreise/rss.xml'),
|
||||
(u'Wissen', u'http://www.sueddeutsche.de/app/service/rss/ressort/wissen/rss.xml'),
|
||||
(u'Panorama', u'http://www.sueddeutsche.de/app/service/rss/ressort/panorama/rss.xml'),
|
||||
(u'Reise', u'http://www.sueddeutsche.de/app/service/rss/ressort/reise/rss.xml'),
|
||||
(u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
|
||||
(u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
|
||||
(u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
|
||||
(u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
|
||||
(u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
|
||||
(u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
|
||||
(u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
|
||||
(u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
|
||||
(u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
|
||||
(u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
|
||||
(u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
|
||||
(u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
|
||||
(u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
|
||||
(u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
|
||||
(u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
|
||||
(u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
|
||||
(u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only
|
||||
(u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only
|
||||
(u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
|
||||
(u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only
|
||||
]
|
||||
|
||||
# def print_version(self, url): #AGe 2011-12-16 deactivated
|
||||
# main, sep, id = url.rpartition('/') #AGe 2011-12-16 deactivated
|
||||
# return main + '/2.220/' + id #AGe 2011-12-16 deactivated
|
||||
# AGe 2011-12-16 Problem of Handling redirections solved by a solution of Recipes-Re-usable code from kiklop74.
|
||||
# Feed is: http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss
|
||||
# Article download source is: http://sz.de/1.1237295 (Ski Alpin: Der Erfolg kommt, der Trainer geht)
|
||||
# Article source is: http://www.sueddeutsche.de/sport/ski-alpin-der-erfolg-kommt-der-trainer-geht-1.1237295
|
||||
# Article printversion is: http://www.sueddeutsche.de/sport/2.220/ski-alpin-der-erfolg-kommt-der-trainer-geht-1.1237295
|
||||
def print_version(self, url):
|
||||
n_url=self.browser.open_novisit(url).geturl()
|
||||
main, sep, id = n_url.rpartition('/')
|
||||
return main + '/2.220/' + id
|
||||
|
@ -1,4 +1,4 @@
|
||||
import re
|
||||
import re, urllib
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TimesOfIndia(BasicNewsRecipe):
|
||||
@ -17,7 +17,9 @@ class TimesOfIndia(BasicNewsRecipe):
|
||||
]
|
||||
remove_tags = [
|
||||
{'class':re.compile('tabsintbgshow|prvnxtbg')},
|
||||
{'id':['fbrecommend', 'relmaindiv']}
|
||||
{'id':['fbrecommend', 'relmaindiv', 'shretxt', 'fbrecos', 'twtdiv',
|
||||
'gpls', 'auim']},
|
||||
{'class':['twitter-share-button', 'cmtmn']},
|
||||
]
|
||||
|
||||
feeds = [
|
||||
@ -46,25 +48,27 @@ class TimesOfIndia(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
# Times of India sometimes serves an ad page instead of the article,
|
||||
# this code, detects and circumvents that
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if '/0Ltimesofindia' in url:
|
||||
url = url.partition('/0L')[-1]
|
||||
url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
|
||||
'/').replace('0E', '-')
|
||||
url = 'http://' + url.rpartition('/')[0]
|
||||
match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
|
||||
if match is not None:
|
||||
num = match.group(1)
|
||||
num = re.sub(r'[^0-9]', '', num)
|
||||
return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
|
||||
num)
|
||||
else:
|
||||
cms = re.search(r'/(\d+)\.cms', url)
|
||||
if cms is not None:
|
||||
return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
|
||||
cms.group(1))
|
||||
try:
|
||||
s = article.summary
|
||||
return urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
link = article.get('link', None)
|
||||
if link and link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
|
||||
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://'}
|
||||
for k, v in encoding.iteritems():
|
||||
link = link.replace(k, v)
|
||||
return link
|
||||
|
||||
return url
|
||||
def print_version(self, url):
|
||||
return url + '?prtpage=1'
|
||||
|
||||
def preprocess_html(self, soup, *args):
|
||||
byl = soup.find(attrs={'class':'byline'})
|
||||
if byl is not None:
|
||||
for l in byl.findAll('label'):
|
||||
l.extract()
|
||||
return soup
|
||||
|
17
recipes/tuttojove.recipe
Normal file
17
recipes/tuttojove.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Italian website on Juventus F.C. - v1.00 (17, December 2011)'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1305984536(BasicNewsRecipe):
|
||||
title = u'tuttojuve'
|
||||
description = 'Juventus'
|
||||
language = 'it'
|
||||
__author__ = 'faber1971'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'notizie', u'http://feeds.tuttojuve.com/rss/'), (u'da vinovo', u'http://feeds.tuttojuve.com/rss/?c=10'), (u'primo piano', u'http://feeds.tuttojuve.com/rss/?c=16'), (u'editoriale', u'http://feeds.tuttojuve.com/rss/?c=3'), (u'il punto', u'http://feeds.tuttojuve.com/rss/?c=8'), (u'pagelle', u'http://feeds.tuttojuve.com/rss/?c=9'), (u'avversario', u'http://feeds.tuttojuve.com/rss/?c=11')]
|
||||
def print_version(self, url):
|
||||
return self.browser.open_novisit(url).geturl()
|
@ -8,14 +8,14 @@ msgstr ""
|
||||
"Project-Id-Version: calibre\n"
|
||||
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2011-11-26 08:48+0000\n"
|
||||
"PO-Revision-Date: 2011-12-17 09:29+0000\n"
|
||||
"Last-Translator: Jellby <Unknown>\n"
|
||||
"Language-Team: Spanish <es@li.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2011-11-27 05:24+0000\n"
|
||||
"X-Generator: Launchpad (build 14381)\n"
|
||||
"X-Launchpad-Export-Date: 2011-12-18 04:37+0000\n"
|
||||
"X-Generator: Launchpad (build 14525)\n"
|
||||
|
||||
#. name for aaa
|
||||
msgid "Ghotuo"
|
||||
@ -4963,7 +4963,7 @@ msgstr "Catawba"
|
||||
|
||||
#. name for chd
|
||||
msgid "Chontal; Highland Oaxaca"
|
||||
msgstr ""
|
||||
msgstr "Chontal oaxaqueño de las tierras altas"
|
||||
|
||||
#. name for che
|
||||
msgid "Chechen"
|
||||
@ -4971,7 +4971,7 @@ msgstr "Checheno"
|
||||
|
||||
#. name for chf
|
||||
msgid "Chontal; Tabasco"
|
||||
msgstr ""
|
||||
msgstr "Chontal de tabasco"
|
||||
|
||||
#. name for chg
|
||||
msgid "Chagatai"
|
||||
@ -4983,7 +4983,7 @@ msgstr "Chinook"
|
||||
|
||||
#. name for chj
|
||||
msgid "Chinantec; Ojitlán"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Ojitlán"
|
||||
|
||||
#. name for chk
|
||||
msgid "Chuukese"
|
||||
@ -5011,7 +5011,7 @@ msgstr "Chipewyan"
|
||||
|
||||
#. name for chq
|
||||
msgid "Chinantec; Quiotepec"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Quiotepec"
|
||||
|
||||
#. name for chr
|
||||
msgid "Cherokee"
|
||||
@ -5043,7 +5043,7 @@ msgstr "Cheyenne"
|
||||
|
||||
#. name for chz
|
||||
msgid "Chinantec; Ozumacín"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Ozumacín"
|
||||
|
||||
#. name for cia
|
||||
msgid "Cia-Cia"
|
||||
@ -5051,7 +5051,7 @@ msgstr "Cia-cia"
|
||||
|
||||
#. name for cib
|
||||
msgid "Gbe; Ci"
|
||||
msgstr ""
|
||||
msgstr "Cigbe"
|
||||
|
||||
#. name for cic
|
||||
msgid "Chickasaw"
|
||||
@ -5215,7 +5215,7 @@ msgstr "Arameo moderno caldeo"
|
||||
|
||||
#. name for cle
|
||||
msgid "Chinantec; Lealao"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Lealao"
|
||||
|
||||
#. name for clh
|
||||
msgid "Chilisso"
|
||||
@ -5239,7 +5239,7 @@ msgstr "Clallam"
|
||||
|
||||
#. name for clo
|
||||
msgid "Chontal; Lowland Oaxaca"
|
||||
msgstr ""
|
||||
msgstr "Chontal oaxaqueño de las tieras bajas"
|
||||
|
||||
#. name for clu
|
||||
msgid "Caluyanun"
|
||||
@ -5251,7 +5251,7 @@ msgstr "Chulym"
|
||||
|
||||
#. name for cly
|
||||
msgid "Chatino; Eastern Highland"
|
||||
msgstr ""
|
||||
msgstr "Chatino Lachao-Yolotepec"
|
||||
|
||||
#. name for cma
|
||||
msgid "Maa"
|
||||
@ -5327,7 +5327,7 @@ msgstr ""
|
||||
|
||||
#. name for cnl
|
||||
msgid "Chinantec; Lalana"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Lalana"
|
||||
|
||||
#. name for cno
|
||||
msgid "Con"
|
||||
@ -5339,7 +5339,7 @@ msgstr "Asmat central"
|
||||
|
||||
#. name for cnt
|
||||
msgid "Chinantec; Tepetotutla"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Tepetotutla"
|
||||
|
||||
#. name for cnu
|
||||
msgid "Chenoua"
|
||||
@ -5355,7 +5355,7 @@ msgstr "Córnico medio"
|
||||
|
||||
#. name for coa
|
||||
msgid "Malay; Cocos Islands"
|
||||
msgstr ""
|
||||
msgstr "Malayo de las Islas Cocos"
|
||||
|
||||
#. name for cob
|
||||
msgid "Chicomuceltec"
|
||||
@ -5391,7 +5391,7 @@ msgstr "Cochimi"
|
||||
|
||||
#. name for cok
|
||||
msgid "Cora; Santa Teresa"
|
||||
msgstr ""
|
||||
msgstr "Cora de Santa Teresa"
|
||||
|
||||
#. name for col
|
||||
msgid "Columbia-Wenatchi"
|
||||
@ -5455,7 +5455,7 @@ msgstr "Chocho"
|
||||
|
||||
#. name for cpa
|
||||
msgid "Chinantec; Palantla"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Palantla"
|
||||
|
||||
#. name for cpb
|
||||
msgid "Ashéninka; Ucayali-Yurúa"
|
||||
@ -5599,7 +5599,7 @@ msgstr "Cruzeño"
|
||||
|
||||
#. name for csa
|
||||
msgid "Chinantec; Chiltepec"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Chiltepec"
|
||||
|
||||
#. name for csb
|
||||
msgid "Kashubian"
|
||||
@ -5651,7 +5651,7 @@ msgstr "Lengua de signos colombiana"
|
||||
|
||||
#. name for cso
|
||||
msgid "Chinantec; Sochiapan"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Sochiapan"
|
||||
|
||||
#. name for csq
|
||||
msgid "Croatia Sign Language"
|
||||
@ -5683,7 +5683,7 @@ msgstr "Coos"
|
||||
|
||||
#. name for cta
|
||||
msgid "Chatino; Tataltepec"
|
||||
msgstr ""
|
||||
msgstr "Chatino de Tataltepec"
|
||||
|
||||
#. name for ctc
|
||||
msgid "Chetco"
|
||||
@ -5695,7 +5695,7 @@ msgstr ""
|
||||
|
||||
#. name for cte
|
||||
msgid "Chinantec; Tepinapa"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Tepinapa"
|
||||
|
||||
#. name for ctg
|
||||
msgid "Chittagonian"
|
||||
@ -5703,7 +5703,7 @@ msgstr "Chitagoniano"
|
||||
|
||||
#. name for ctl
|
||||
msgid "Chinantec; Tlacoatzintepec"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Tlacoatzintepec"
|
||||
|
||||
#. name for ctm
|
||||
msgid "Chitimacha"
|
||||
@ -5719,7 +5719,7 @@ msgstr "Emberá-catío"
|
||||
|
||||
#. name for ctp
|
||||
msgid "Chatino; Western Highland"
|
||||
msgstr ""
|
||||
msgstr "Chatino"
|
||||
|
||||
#. name for cts
|
||||
msgid "Bicolano; Northern Catanduanes"
|
||||
@ -5735,7 +5735,7 @@ msgstr "Chol"
|
||||
|
||||
#. name for ctz
|
||||
msgid "Chatino; Zacatepec"
|
||||
msgstr ""
|
||||
msgstr "Chatino de Zacatepec"
|
||||
|
||||
#. name for cua
|
||||
msgid "Cua"
|
||||
@ -5747,7 +5747,7 @@ msgstr "Cubeo"
|
||||
|
||||
#. name for cuc
|
||||
msgid "Chinantec; Usila"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Usila"
|
||||
|
||||
#. name for cug
|
||||
msgid "Cung"
|
||||
@ -5819,7 +5819,7 @@ msgstr "Chug"
|
||||
|
||||
#. name for cvn
|
||||
msgid "Chinantec; Valle Nacional"
|
||||
msgstr ""
|
||||
msgstr "Chinanteco de Valle Nacional"
|
||||
|
||||
#. name for cwa
|
||||
msgid "Kabwa"
|
||||
@ -5847,7 +5847,7 @@ msgstr "Kuwaataay"
|
||||
|
||||
#. name for cya
|
||||
msgid "Chatino; Nopala"
|
||||
msgstr ""
|
||||
msgstr "Chatino de Nopala"
|
||||
|
||||
#. name for cyb
|
||||
msgid "Cayubaba"
|
||||
@ -5871,7 +5871,7 @@ msgstr "Knaanic"
|
||||
|
||||
#. name for czn
|
||||
msgid "Chatino; Zenzontepec"
|
||||
msgstr ""
|
||||
msgstr "Chatino de Zenzontepec"
|
||||
|
||||
#. name for czo
|
||||
msgid "Chinese; Min Zhong"
|
||||
@ -10603,7 +10603,7 @@ msgstr "Javanés"
|
||||
|
||||
#. name for jax
|
||||
msgid "Malay; Jambi"
|
||||
msgstr ""
|
||||
msgstr "Malayo de Jambi"
|
||||
|
||||
#. name for jay
|
||||
msgid "Yan-nhangu"
|
||||
@ -14743,7 +14743,7 @@ msgstr "Lorung meridional"
|
||||
|
||||
#. name for lrt
|
||||
msgid "Malay; Larantuka"
|
||||
msgstr ""
|
||||
msgstr "Malayo de Larantuka"
|
||||
|
||||
#. name for lrv
|
||||
msgid "Larevat"
|
||||
@ -15099,7 +15099,7 @@ msgstr ""
|
||||
|
||||
#. name for max
|
||||
msgid "Malay; North Moluccan"
|
||||
msgstr ""
|
||||
msgstr "Malayo de las Molucas septentrional"
|
||||
|
||||
#. name for maz
|
||||
msgid "Mazahua; Central"
|
||||
@ -15127,7 +15127,7 @@ msgstr ""
|
||||
|
||||
#. name for mbf
|
||||
msgid "Malay; Baba"
|
||||
msgstr ""
|
||||
msgstr "Malayo baba"
|
||||
|
||||
#. name for mbh
|
||||
msgid "Mangseng"
|
||||
@ -15467,7 +15467,7 @@ msgstr "Mende (Sierra Leona)"
|
||||
|
||||
#. name for meo
|
||||
msgid "Malay; Kedah"
|
||||
msgstr ""
|
||||
msgstr "Malayo de Kedah"
|
||||
|
||||
#. name for mep
|
||||
msgid "Miriwung"
|
||||
@ -15511,7 +15511,7 @@ msgstr ""
|
||||
|
||||
#. name for mfa
|
||||
msgid "Malay; Pattani"
|
||||
msgstr ""
|
||||
msgstr "Malayo de Pattani"
|
||||
|
||||
#. name for mfb
|
||||
msgid "Bangka"
|
||||
@ -15571,7 +15571,7 @@ msgstr ""
|
||||
|
||||
#. name for mfp
|
||||
msgid "Malay; Makassar"
|
||||
msgstr ""
|
||||
msgstr "Malayo de Macasar"
|
||||
|
||||
#. name for mfq
|
||||
msgid "Moba"
|
||||
@ -16059,7 +16059,7 @@ msgstr ""
|
||||
|
||||
#. name for mkn
|
||||
msgid "Malay; Kupang"
|
||||
msgstr ""
|
||||
msgstr "Malayo de Kupang"
|
||||
|
||||
#. name for mko
|
||||
msgid "Mingang Doso"
|
||||
@ -16207,7 +16207,7 @@ msgstr ""
|
||||
|
||||
#. name for mlz
|
||||
msgid "Malaynon"
|
||||
msgstr ""
|
||||
msgstr "Malaynón"
|
||||
|
||||
#. name for mma
|
||||
msgid "Mama"
|
||||
@ -16623,7 +16623,7 @@ msgstr ""
|
||||
|
||||
#. name for mqg
|
||||
msgid "Malay; Kota Bangun Kutai"
|
||||
msgstr ""
|
||||
msgstr "Malayo kutai de Kota Bangun"
|
||||
|
||||
#. name for mqh
|
||||
msgid "Mixtec; Tlazoyaltepec"
|
||||
@ -16839,7 +16839,7 @@ msgstr "Malgache masikoro"
|
||||
|
||||
#. name for msi
|
||||
msgid "Malay; Sabah"
|
||||
msgstr ""
|
||||
msgstr "Malayo de Sabah"
|
||||
|
||||
#. name for msj
|
||||
msgid "Ma (Democratic Republic of Congo)"
|
||||
@ -22607,7 +22607,7 @@ msgstr ""
|
||||
|
||||
#. name for sci
|
||||
msgid "Creole Malay; Sri Lankan"
|
||||
msgstr ""
|
||||
msgstr "Malo criollo de Sri Lanka"
|
||||
|
||||
#. name for sck
|
||||
msgid "Sadri"
|
||||
@ -27187,7 +27187,7 @@ msgstr ""
|
||||
|
||||
#. name for vkt
|
||||
msgid "Malay; Tenggarong Kutai"
|
||||
msgstr ""
|
||||
msgstr "Malayo kutai de Tenggarong"
|
||||
|
||||
#. name for vku
|
||||
msgid "Kurrama"
|
||||
@ -28395,7 +28395,7 @@ msgstr "Edomita"
|
||||
|
||||
#. name for xdy
|
||||
msgid "Dayak; Malayic"
|
||||
msgstr ""
|
||||
msgstr "Dayak malayo"
|
||||
|
||||
#. name for xeb
|
||||
msgid "Eblan"
|
||||
@ -28727,7 +28727,7 @@ msgstr "Lengua de signos malasia"
|
||||
|
||||
#. name for xmm
|
||||
msgid "Malay; Manado"
|
||||
msgstr ""
|
||||
msgstr "Malayo de Manado"
|
||||
|
||||
#. name for xmn
|
||||
msgid "Persian; Manichaean Middle"
|
||||
|
@ -233,7 +233,7 @@ class GetTranslations(Translations): # {{{
|
||||
|
||||
if self.modified_translations:
|
||||
subprocess.check_call(['bzr', 'commit', '-m',
|
||||
'IGN:Updated translations', self.PATH])
|
||||
'IGN:Updated translations'])
|
||||
else:
|
||||
print('No updated translations available')
|
||||
|
||||
|
@ -527,7 +527,7 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252',
|
||||
ent = match.group(1)
|
||||
if ent in exceptions:
|
||||
return '&'+ent+';'
|
||||
if ent == 'apos':
|
||||
if ent in {'apos', 'squot'}: # squot is generated by some broken CMS software
|
||||
return check("'")
|
||||
if ent == 'hellips':
|
||||
ent = 'hellip'
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 31)
|
||||
numeric_version = (0, 8, 32)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -48,7 +48,8 @@ class ANDROID(USBMS):
|
||||
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
|
||||
0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
|
||||
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
|
||||
0x70c6 : [0x226]
|
||||
0x70c6 : [0x226],
|
||||
0x4316 : [0x216],
|
||||
},
|
||||
# Freescale
|
||||
0x15a2 : {
|
||||
@ -87,6 +88,7 @@ class ANDROID(USBMS):
|
||||
0x6877 : [0x0400],
|
||||
0x689e : [0x0400],
|
||||
0xdeed : [0x0222],
|
||||
0x1234 : [0x0400],
|
||||
},
|
||||
|
||||
# Viewsonic/Vizio
|
||||
@ -170,13 +172,14 @@ class ANDROID(USBMS):
|
||||
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
|
||||
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
|
||||
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
|
||||
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107']
|
||||
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
|
||||
'GT-I9003_CARD', 'XT912']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
|
||||
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
||||
'A1-07___C0541A4F']
|
||||
'A1-07___C0541A4F', 'XT912']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -170,8 +170,8 @@ class ODYSSEY(N516):
|
||||
description = _('Communicate with the Cybook Odyssey eBook reader.')
|
||||
|
||||
BCD = [0x316]
|
||||
VENDOR_NAME = 'LINUX'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
|
||||
VENDOR_NAME = ['LINUX', 'BOOKEEN']
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['FILE-STOR_GADGET', 'FLASH_DISK']
|
||||
|
||||
FORMATS = ['epub', 'fb2', 'html', 'pdf', 'txt']
|
||||
|
||||
|
@ -266,12 +266,14 @@ class PRST1(USBMS):
|
||||
collections = booklist.get_collections(collections_attributes)
|
||||
|
||||
with closing(sqlite.connect(dbpath)) as connection:
|
||||
self.update_device_books(connection, booklist, source_id, plugboard)
|
||||
self.update_device_books(connection, booklist, source_id,
|
||||
plugboard, dbpath)
|
||||
self.update_device_collections(connection, booklist, collections, source_id)
|
||||
|
||||
debug_print('PRST1: finished update_device_database')
|
||||
|
||||
def update_device_books(self, connection, booklist, source_id, plugboard):
|
||||
def update_device_books(self, connection, booklist, source_id, plugboard,
|
||||
dbpath):
|
||||
opts = self.settings()
|
||||
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
|
||||
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
|
||||
@ -284,12 +286,12 @@ class PRST1(USBMS):
|
||||
query = 'SELECT file_path, _id FROM books'
|
||||
cursor.execute(query)
|
||||
except DatabaseError:
|
||||
raise DeviceError('The SONY database is corrupted. '
|
||||
raise DeviceError(('The SONY database is corrupted. '
|
||||
' Delete the file %s on your reader and then disconnect '
|
||||
' reconnect it. If you are using an SD card, you '
|
||||
' should delete the file on the card as well. Note that '
|
||||
' deleting this file may cause your reader to forget '
|
||||
' any notes/highlights, etc.')
|
||||
' deleting this file will cause your reader to forget '
|
||||
' any notes/highlights, etc.')%dbpath)
|
||||
|
||||
db_books = {}
|
||||
for i, row in enumerate(cursor):
|
||||
|
@ -276,11 +276,11 @@ OptionRecommendation(name='duplicate_links_in_toc',
|
||||
|
||||
OptionRecommendation(name='chapter',
|
||||
recommended_value="//*[((name()='h1' or name()='h2') and "
|
||||
r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class "
|
||||
r"re:test(., '\s*((chapter|book|section|part)\s+)|((prolog|prologue|epilogue)(\s+|$))', 'i')) or @class "
|
||||
"= 'chapter']", level=OptionRecommendation.LOW,
|
||||
help=_('An XPath expression to detect chapter titles. The default '
|
||||
'is to consider <h1> or <h2> tags that contain the words '
|
||||
'"chapter","book","section" or "part" as chapter titles as '
|
||||
'"chapter","book","section", "prologue", "epilogue", or "part" as chapter titles as '
|
||||
'well as any tags that have class="chapter". The expression '
|
||||
'used must evaluate to a list of elements. To disable chapter '
|
||||
'detection, use the expression "/". See the XPath Tutorial '
|
||||
|
@ -35,7 +35,7 @@ def initialize_container(path_to_container, opf_name='metadata.opf',
|
||||
'''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
|
||||
zf = ZipFile(path_to_container, 'w')
|
||||
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
|
||||
zf.writestr('META-INF/', '', 0700)
|
||||
zf.writestr('META-INF/', '', 0755)
|
||||
zf.writestr('META-INF/container.xml', CONTAINER)
|
||||
for path, _, data in extra_entries:
|
||||
zf.writestr(path, data)
|
||||
|
@ -1136,7 +1136,8 @@ class BinaryRecord(object): # {{{
|
||||
self.raw = record.raw
|
||||
sig = self.raw[:4]
|
||||
name = '%06d'%idx
|
||||
if sig in (b'FCIS', b'FLIS', b'SRCS', b'DATP'):
|
||||
if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
|
||||
b'FDST', b'AUDI', b'VIDE',}:
|
||||
name += '-' + sig.decode('ascii')
|
||||
elif sig == b'\xe9\x8e\r\n':
|
||||
name += '-' + 'EOF'
|
||||
|
@ -325,6 +325,7 @@ class MobiMLizer(object):
|
||||
elem.text = None
|
||||
elem.set('id', id_)
|
||||
elem.tail = tail
|
||||
elem.tag = XHTML('a')
|
||||
else:
|
||||
return
|
||||
tag = barename(elem.tag)
|
||||
|
@ -502,6 +502,7 @@ class MobiReader(object):
|
||||
self.processed_html = self.processed_html.replace('> <', '>\n<')
|
||||
self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
|
||||
self.processed_html = re.sub(r'<\?xml[^>]*>', '', self.processed_html)
|
||||
self.processed_html = re.sub(r'<(/?)o:p', r'<\1p', self.processed_html)
|
||||
# Swap inline and block level elements, and order block level elements according to priority
|
||||
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
|
||||
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
|
||||
@ -974,7 +975,7 @@ class MobiReader(object):
|
||||
processed_records.append(i)
|
||||
data = self.sections[i][0]
|
||||
if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
|
||||
b'RESC', b'BOUN', b'FDST', b'DATP'}:
|
||||
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
|
||||
# A FLIS, FCIS, SRCS or EOF record, ignore
|
||||
continue
|
||||
buf = cStringIO.StringIO(data)
|
||||
|
@ -942,7 +942,13 @@ class Manifest(object):
|
||||
if isinstance(data, etree._Element):
|
||||
ans = xml2str(data, pretty_print=self.oeb.pretty_print)
|
||||
if self.media_type in OEB_DOCS:
|
||||
ans = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ans)
|
||||
# Convert self closing div|span|a tags to normally closed
|
||||
# ones, as they are interpreted incorrectly by some browser
|
||||
# based renderers
|
||||
ans = re.sub(
|
||||
# tag name followed by either a space or a /
|
||||
r'<(?P<tag>div|a|span)(?=[\s/])(?P<arg>[^>]*)/>',
|
||||
r'<\g<tag>\g<arg>></\g<tag>>', ans)
|
||||
return ans
|
||||
if isinstance(data, unicode):
|
||||
return data.encode('utf-8')
|
||||
|
@ -4,6 +4,7 @@
|
||||
###
|
||||
Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
|
||||
Released under the GPLv3 License
|
||||
Based on code originally written by Peter Sorotkin (epubcfi.js)
|
||||
###
|
||||
#
|
||||
log = (error) ->
|
||||
@ -159,6 +160,63 @@ class CanonicalFragmentIdentifier
|
||||
|
||||
point = {}
|
||||
error = null
|
||||
offset = null
|
||||
|
||||
if (r = cfi.match(/^:(\d+)/)) != null
|
||||
# Character offset
|
||||
offset = parseInt(r[1])
|
||||
cfi = cfi.substr(r[0].length)
|
||||
|
||||
if (r = cfi.match(/^~(-?\d+(\.\d+)?)/)) != null
|
||||
# Temporal offset
|
||||
point.time = r[1] - 0 # Coerce to number
|
||||
cfi = cfi.substr(r[0].length)
|
||||
|
||||
if (r = cfi.match(/^@(-?\d+(\.\d+)?),(-?\d+(\.\d+)?)/)) != null
|
||||
# Spatial offset
|
||||
point.x = r[1] - 0 # Coerce to number
|
||||
point.y = r[3] - 0 # Coerce to number
|
||||
cfi = cfi.substr(r[0].length)
|
||||
|
||||
if( (r = cfi.match(/^\[([^\]]+)\]/)) != null )
|
||||
assertion = r[1]
|
||||
cfi = cfi.substr(r[0].length)
|
||||
if (r = assertion.match(/;s=([ab])$/)) != null
|
||||
if r.index > 0 and assertion[r.index - 1] != '^'
|
||||
assertion = assertion.substr(0, r.index)
|
||||
point.forward = (r[1] == 'a')
|
||||
assertion = unescape_from_cfi(assertion)
|
||||
# TODO: Handle text assertion
|
||||
|
||||
# Find the text node that contains the offset
|
||||
node?.parentNode?.normalize()
|
||||
if offset != null
|
||||
while true
|
||||
len = node.nodeValue.length
|
||||
if offset < len or (not point.forward and offset == len)
|
||||
break
|
||||
next = false
|
||||
while true
|
||||
nn = node.nextSibling
|
||||
if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata
|
||||
next = nn
|
||||
break
|
||||
if not next
|
||||
if offset > len
|
||||
error = "Offset out of range: #{ offset }"
|
||||
offset = len
|
||||
break
|
||||
node = next
|
||||
offset -= len
|
||||
point.offset = offset
|
||||
|
||||
point.node = node
|
||||
if error
|
||||
point.error = error
|
||||
else if cfi.length > 0
|
||||
point.error = "Undecoded CFI: #{ cfi }"
|
||||
|
||||
log(point.error)
|
||||
|
||||
point
|
||||
|
||||
@ -192,7 +250,7 @@ class CanonicalFragmentIdentifier
|
||||
cdoc = cd
|
||||
cwin = cdoc.defaultView
|
||||
|
||||
target.normalize()
|
||||
(if target.parentNode then target.parentNode else target).normalize()
|
||||
|
||||
if name in ['audio', 'video']
|
||||
tail = "~" + fstr target.currentTime
|
||||
@ -214,6 +272,67 @@ class CanonicalFragmentIdentifier
|
||||
this.encode(doc, target, offset, tail)
|
||||
# }}}
|
||||
|
||||
point: (cfi, doc=window?.document) -> # {{{
|
||||
r = this.decode(cfi, doc)
|
||||
if not r
|
||||
return null
|
||||
node = r.node
|
||||
ndoc = node.ownerDocument
|
||||
if not ndoc
|
||||
log("CFI node has no owner document: #{ cfi } #{ node }")
|
||||
return null
|
||||
|
||||
nwin = ndoc.defaultView
|
||||
x = null
|
||||
y = null
|
||||
|
||||
if typeof(r.offset) == "number"
|
||||
# Character offset
|
||||
range = ndoc.createRange()
|
||||
if r.forward
|
||||
try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}]
|
||||
else
|
||||
try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}]
|
||||
k = 0
|
||||
a = null
|
||||
rects = null
|
||||
node_len = node.nodeValue.length
|
||||
until rects or rects.length or k >= try_list.length
|
||||
t = try_list[k++]
|
||||
start_offset = r.offset + t.start
|
||||
end_offset = r.offset + t.end
|
||||
a = t.a
|
||||
if start_offset < 0 or end_offset >= node_len
|
||||
continue
|
||||
range.setStart(node, start_offset)
|
||||
range.setEnd(node, end_offset)
|
||||
rects = range.getClientRects()
|
||||
|
||||
if not rects or not rects.length
|
||||
log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }")
|
||||
return null
|
||||
|
||||
rect = rects[0]
|
||||
x = (a*rect.left + (1-a)*rect.right)
|
||||
y = (rect.top + rect.bottom)/2
|
||||
else
|
||||
x = node.offsetLeft - nwin.scrollX
|
||||
y = node.offsetTop - nwin.scrollY
|
||||
if typeof(r.x) == "number" and node.offsetWidth
|
||||
x += (r.x*node.offsetWidth)/100
|
||||
y += (r.y*node.offsetHeight)/100
|
||||
|
||||
until ndoc == doc
|
||||
node = nwin.frameElement
|
||||
ndoc = node.ownerDocument
|
||||
nwin = ndoc.defaultView
|
||||
x += node.offsetLeft - nwin.scrollX
|
||||
y += node.offsetTop - nwin.scrollY
|
||||
|
||||
{x:x, y:y, node:r.node, time:r.time}
|
||||
|
||||
# }}}
|
||||
|
||||
if window?
|
||||
window.cfi = new CanonicalFragmentIdentifier()
|
||||
else if process?
|
||||
|
@ -2,9 +2,9 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Testing CFI functionality</title>
|
||||
<script type="text/javascript" src="cfi.js"></script>
|
||||
<script type="text/javascript" src="../cfi.coffee"></script>
|
||||
<script type="text/javascript" src="jquery.js"></script>
|
||||
<script type="text/javascript" src="cfi-test.js"></script>
|
||||
<script type="text/javascript" src="cfi-test.coffee"></script>
|
||||
</head>
|
||||
<body>
|
||||
<h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1>
|
||||
|
@ -18,8 +18,8 @@ except ImportError:
|
||||
|
||||
|
||||
def run_devel_server():
|
||||
os.chdir(os.path.dirname(__file__))
|
||||
serve(['../cfi.coffee', 'cfi-test.coffee'])
|
||||
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
||||
serve()
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_devel_server()
|
||||
|
@ -17,6 +17,7 @@ from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||
|
||||
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
|
||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||
XMLNS_NS = 'http://www.w3.org/2000/xmlns/'
|
||||
|
||||
class NotHTML(Exception):
|
||||
|
||||
@ -28,9 +29,7 @@ def barename(name):
|
||||
return name.rpartition('}')[-1]
|
||||
|
||||
def namespace(name):
|
||||
if '}' in name:
|
||||
return name.split('}', 1)[0][1:]
|
||||
return ''
|
||||
return name.rpartition('}')[0][1:]
|
||||
|
||||
def XHTML(name):
|
||||
return '{%s}%s' % (XHTML_NS, name)
|
||||
@ -60,26 +59,86 @@ def merge_multiple_html_heads_and_bodies(root, log=None):
|
||||
log.warn('Merging multiple <head> and <body> sections')
|
||||
return root
|
||||
|
||||
def _html5_parse(data):
|
||||
def clone_element(elem, nsmap={}, in_context=True):
|
||||
if in_context:
|
||||
maker = elem.getroottree().getroot().makeelement
|
||||
else:
|
||||
maker = etree.Element
|
||||
nelem = maker(elem.tag, attrib=elem.attrib,
|
||||
nsmap=nsmap)
|
||||
nelem.text, nelem.tail = elem.text, elem.tail
|
||||
nelem.extend(elem)
|
||||
return nelem
|
||||
|
||||
def html5_parse(data):
|
||||
import html5lib
|
||||
data = html5lib.parse(data, treebuilder='lxml').getroot()
|
||||
html_ns = [ns for ns, val in data.nsmap.iteritems() if (val == XHTML_NS and
|
||||
ns is not None)]
|
||||
if html_ns:
|
||||
# html5lib causes the XHTML namespace to not
|
||||
# be set as the default namespace
|
||||
nsmap = dict(data.nsmap)
|
||||
nsmap[None] = XHTML_NS
|
||||
for x in html_ns:
|
||||
nsmap.pop(x)
|
||||
nroot = etree.Element(data.tag, nsmap=nsmap,
|
||||
attrib=dict(data.attrib))
|
||||
nroot.text = data.text
|
||||
nroot.tail = data.tail
|
||||
for child in data:
|
||||
nroot.append(child)
|
||||
data = nroot
|
||||
return data
|
||||
# Set lang correctly
|
||||
xl = data.attrib.pop('xmlU0003Alang', None)
|
||||
if xl is not None and 'lang' not in data.attrib:
|
||||
data.attrib['lang'] = xl
|
||||
|
||||
# html5lib has the most inelegant handling of namespaces I have ever seen
|
||||
# Try to reconstitute destroyed namespace info
|
||||
xmlns_declaration = '{%s}'%XMLNS_NS
|
||||
non_html5_namespaces = {}
|
||||
seen_namespaces = set()
|
||||
for elem in tuple(data.iter()):
|
||||
elem.attrib.pop('xmlns', None)
|
||||
namespaces = {}
|
||||
for x in tuple(elem.attrib):
|
||||
if x.startswith('xmlnsU') or x.startswith(xmlns_declaration):
|
||||
# A namespace declaration
|
||||
val = elem.attrib.pop(x)
|
||||
if x.startswith('xmlnsU0003A'):
|
||||
prefix = x[11:]
|
||||
namespaces[prefix] = val
|
||||
|
||||
if namespaces:
|
||||
# Some destroyed namespace declarations were found
|
||||
p = elem.getparent()
|
||||
if p is None:
|
||||
# We handle the root node later
|
||||
non_html5_namespaces = namespaces
|
||||
else:
|
||||
idx = p.index(elem)
|
||||
p.remove(elem)
|
||||
elem = clone_element(elem, nsmap=namespaces)
|
||||
p.insert(idx, elem)
|
||||
|
||||
b = barename(elem.tag)
|
||||
idx = b.find('U0003A')
|
||||
if idx > -1:
|
||||
prefix, tag = b[:idx], b[idx+6:]
|
||||
ns = elem.nsmap.get(prefix, None)
|
||||
if ns is None:
|
||||
ns = non_html5_namespaces.get(prefix, None)
|
||||
if ns is not None:
|
||||
elem.tag = '{%s}%s'%(ns, tag)
|
||||
|
||||
for b in tuple(elem.attrib):
|
||||
idx = b.find('U0003A')
|
||||
if idx > -1:
|
||||
prefix, tag = b[:idx], b[idx+6:]
|
||||
ns = elem.nsmap.get(prefix, None)
|
||||
if ns is None:
|
||||
ns = non_html5_namespaces.get(prefix, None)
|
||||
if ns is not None:
|
||||
elem.attrib['{%s}%s'%(ns, tag)] = elem.attrib.pop(b)
|
||||
|
||||
seen_namespaces |= set(elem.nsmap.itervalues())
|
||||
|
||||
nsmap = dict(html5lib.constants.namespaces)
|
||||
nsmap[None] = nsmap.pop('html')
|
||||
non_html5_namespaces.update(nsmap)
|
||||
nsmap = non_html5_namespaces
|
||||
|
||||
data = clone_element(data, nsmap=nsmap, in_context=False)
|
||||
|
||||
# Remove unused namespace declarations
|
||||
fnsmap = {k:v for k,v in nsmap.iteritems() if v in seen_namespaces and v !=
|
||||
XMLNS_NS}
|
||||
return clone_element(data, nsmap=fnsmap, in_context=False)
|
||||
|
||||
def _html4_parse(data, prefer_soup=False):
|
||||
if prefer_soup:
|
||||
@ -177,7 +236,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
except etree.XMLSyntaxError:
|
||||
log.debug('Parsing %s as HTML' % filename)
|
||||
try:
|
||||
data = _html5_parse(data)
|
||||
data = html5_parse(data)
|
||||
except:
|
||||
log.exception(
|
||||
'HTML 5 parsing failed, falling back to older parsers')
|
||||
@ -261,6 +320,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
nroot.append(elem)
|
||||
data = nroot
|
||||
|
||||
|
||||
data = merge_multiple_html_heads_and_bodies(data, log)
|
||||
# Ensure has a <head/>
|
||||
head = xpath(data, '/h:html/h:head')
|
||||
|
@ -159,15 +159,18 @@ class FlatFilenames(object): # {{{
|
||||
continue
|
||||
|
||||
data = item.data
|
||||
isp = item.spine_position
|
||||
nhref = oeb.manifest.generate(href=nhref)[1]
|
||||
if isp is not None:
|
||||
oeb.spine.remove(item)
|
||||
oeb.manifest.remove(item)
|
||||
|
||||
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
|
||||
fallback=item.fallback)
|
||||
self.rename_map[item.href] = nhref
|
||||
self.renamed_items_map[nhref] = item
|
||||
if item.spine_position is not None:
|
||||
oeb.spine.insert(item.spine_position, nitem, item.linear)
|
||||
oeb.spine.remove(item)
|
||||
oeb.manifest.remove(item)
|
||||
if isp is not None:
|
||||
oeb.spine.insert(isp, nitem, item.linear)
|
||||
|
||||
if self.rename_map:
|
||||
self.log('Found non-flat filenames, renaming to support broken'
|
||||
|
@ -16,7 +16,7 @@ class UnsmartenPunctuation(object):
|
||||
|
||||
def unsmarten(self, root):
|
||||
for x in self.html_tags(root):
|
||||
if not barename(x) == 'pre':
|
||||
if not barename(x.tag) == 'pre':
|
||||
if getattr(x, 'text', None):
|
||||
x.text = unsmarten_text(x.text)
|
||||
if getattr(x, 'tail', None) and x.tail:
|
||||
|
@ -273,11 +273,34 @@ def error_dialog(parent, title, msg, det_msg='', show=False,
|
||||
return d
|
||||
|
||||
def question_dialog(parent, title, msg, det_msg='', show_copy_button=False,
|
||||
default_yes=True):
|
||||
default_yes=True,
|
||||
# Skippable dialogs
|
||||
# Set skip_dialog_name to a unique name for this dialog
|
||||
# Set skip_dialog_msg to a message displayed to the user
|
||||
skip_dialog_name=None, skip_dialog_msg=_('Show this confirmation again'),
|
||||
skip_dialog_skipped_value=True, skip_dialog_skip_precheck=True):
|
||||
from calibre.gui2.dialogs.message_box import MessageBox
|
||||
|
||||
auto_skip = set(gprefs.get('questions_to_auto_skip', []))
|
||||
if (skip_dialog_name is not None and skip_dialog_name in auto_skip):
|
||||
return bool(skip_dialog_skipped_value)
|
||||
|
||||
d = MessageBox(MessageBox.QUESTION, title, msg, det_msg, parent=parent,
|
||||
show_copy_button=show_copy_button, default_yes=default_yes)
|
||||
return d.exec_() == d.Accepted
|
||||
|
||||
if skip_dialog_name is not None and skip_dialog_msg:
|
||||
tc = d.toggle_checkbox
|
||||
tc.setVisible(True)
|
||||
tc.setText(skip_dialog_msg)
|
||||
tc.setChecked(bool(skip_dialog_skip_precheck))
|
||||
|
||||
ret = d.exec_() == d.Accepted
|
||||
|
||||
if skip_dialog_name is not None and not d.toggle_checkbox.isChecked():
|
||||
auto_skip.add(skip_dialog_name)
|
||||
gprefs.set('questions_to_auto_skip', list(auto_skip))
|
||||
|
||||
return ret
|
||||
|
||||
def info_dialog(parent, title, msg, det_msg='', show=False,
|
||||
show_copy_button=True):
|
||||
|
@ -683,7 +683,7 @@ class DeviceMixin(object): # {{{
|
||||
return self.ask_a_yes_no_question(
|
||||
_('No suitable formats'), msg,
|
||||
ans_when_user_unavailable=True,
|
||||
det_msg=autos
|
||||
det_msg=autos, skip_dialog_name='auto_convert_before_send'
|
||||
)
|
||||
|
||||
def set_default_thumbnail(self, height):
|
||||
|
@ -12,7 +12,7 @@ from PyQt4.Qt import QDialog, QApplication
|
||||
from calibre.gui2.dialogs.add_from_isbn_ui import Ui_Dialog
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.constants import iswindows
|
||||
from calibre.gui2 import gprefs
|
||||
from calibre.gui2 import gprefs, question_dialog, error_dialog
|
||||
|
||||
class AddFromISBN(QDialog, Ui_Dialog):
|
||||
|
||||
@ -44,6 +44,7 @@ class AddFromISBN(QDialog, Ui_Dialog):
|
||||
tags = list(filter(None, [x.strip() for x in tags]))
|
||||
gprefs['add from ISBN tags'] = tags
|
||||
self.set_tags = tags
|
||||
bad = set()
|
||||
for line in unicode(self.isbn_box.toPlainText()).strip().splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
@ -64,5 +65,19 @@ class AddFromISBN(QDialog, Ui_Dialog):
|
||||
os.access(parts[1], os.R_OK) and os.path.isfile(parts[1]):
|
||||
book['path'] = parts[1]
|
||||
self.books.append(book)
|
||||
else:
|
||||
bad.add(parts[0])
|
||||
if bad:
|
||||
if self.books:
|
||||
if not question_dialog(self, _('Some invalid ISBNs'),
|
||||
_('Some of the ISBNs you entered were invalid. They will'
|
||||
' be ignored. Click Show Details to see which ones.'
|
||||
' Do you want to proceed?'), det_msg='\n'.join(bad),
|
||||
show_copy_button=True):
|
||||
return
|
||||
else:
|
||||
return error_dialog(self, _('All invalid ISBNs'),
|
||||
_('All the ISBNs you entered were invalid. No books'
|
||||
' can be added.'), show=True)
|
||||
QDialog.accept(self, *args)
|
||||
|
||||
|
@ -44,6 +44,7 @@ class MessageBox(QDialog, Ui_Dialog): # {{{
|
||||
self.msg.setText(msg)
|
||||
self.det_msg.setPlainText(det_msg)
|
||||
self.det_msg.setVisible(False)
|
||||
self.toggle_checkbox.setVisible(False)
|
||||
|
||||
if show_copy_button:
|
||||
self.ctc_button = self.bb.addButton(_('&Copy to clipboard'),
|
||||
|
@ -53,7 +53,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0" colspan="2">
|
||||
<item row="3" column="0" colspan="2">
|
||||
<widget class="QDialogButtonBox" name="bb">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
@ -63,6 +63,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="toggle_checkbox">
|
||||
<property name="text">
|
||||
<string/>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources>
|
||||
|
@ -143,12 +143,12 @@ class GuiRunner(QObject):
|
||||
add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False)
|
||||
sys.excepthook = main.unhandled_exception
|
||||
if len(self.args) > 1:
|
||||
p = os.path.abspath(self.args[1])
|
||||
if os.path.isdir(p):
|
||||
prints('Ignoring directory passed as command line argument:',
|
||||
self.args[1])
|
||||
else:
|
||||
add_filesystem_book(p)
|
||||
files = [os.path.abspath(p) for p in self.args[1:] if not
|
||||
os.path.isdir(p)]
|
||||
if len(files) < len(sys.argv[1:]):
|
||||
prints('Ignoring directories passed as command line arguments')
|
||||
if files:
|
||||
add_filesystem_book(files)
|
||||
self.app.file_event_hook = add_filesystem_book
|
||||
self.main = main
|
||||
|
||||
|
@ -162,6 +162,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||
for key in dynamic.keys():
|
||||
if key.endswith('_again') and dynamic[key] is False:
|
||||
dynamic[key] = True
|
||||
gprefs['questions_to_auto_skip'] = []
|
||||
info_dialog(self, _('Done'),
|
||||
_('Confirmation dialogs have all been reset'), show=True)
|
||||
|
||||
|
@ -260,11 +260,11 @@ class ShortcutConfig(QWidget):
|
||||
self.view.setModel(model)
|
||||
self.delegate = Delegate()
|
||||
self.view.setItemDelegate(self.delegate)
|
||||
self.delegate.sizeHintChanged.connect(self.scrollTo)
|
||||
self.delegate.sizeHintChanged.connect(self.scrollTo,
|
||||
type=Qt.QueuedConnection)
|
||||
|
||||
def scrollTo(self, index):
|
||||
self.view.scrollTo(index)
|
||||
|
||||
self.view.scrollTo(index, self.view.EnsureVisible)
|
||||
|
||||
@property
|
||||
def is_editing(self):
|
||||
|
@ -54,12 +54,12 @@ class GandalfStore(BasicStoreConfig, StorePlugin):
|
||||
if not id:
|
||||
continue
|
||||
|
||||
cover_url = ''.join(data.xpath('.//img/@src'))
|
||||
cover_url = ''.join(data.xpath('.//div[@class="info"]/h3/a/@id'))
|
||||
title = ''.join(data.xpath('.//div[@class="info"]/h3/a/@title'))
|
||||
formats = title.split()
|
||||
formats = formats[-1]
|
||||
formats = ''.join(data.xpath('.//div[@class="info"]/p[1]/text()'))
|
||||
formats = re.findall(r'\((.*?)\)',formats)[0]
|
||||
author = ''.join(data.xpath('.//div[@class="info"]/h4/text() | .//div[@class="info"]/h4/span/text()'))
|
||||
price = ''.join(data.xpath('.//h3[@class="promocja"]/text()'))
|
||||
price = ''.join(data.xpath('.//div[@class="options"]/h3/text()'))
|
||||
price = re.sub('PLN', 'zł', price)
|
||||
price = re.sub('\.', ',', price)
|
||||
drm = data.xpath('boolean(.//div[@class="info" and contains(., "Zabezpieczenie: DRM")])')
|
||||
@ -67,7 +67,7 @@ class GandalfStore(BasicStoreConfig, StorePlugin):
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url
|
||||
s.cover_url = 'http://imguser.gandalf.com.pl/' + re.sub('p', 'p_', cover_url) + '.jpg'
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price
|
||||
|
@ -407,11 +407,14 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
|
||||
return getattr(self, '__systray_minimized', False)
|
||||
|
||||
def ask_a_yes_no_question(self, title, msg, det_msg='',
|
||||
show_copy_button=False, ans_when_user_unavailable=True):
|
||||
show_copy_button=False, ans_when_user_unavailable=True,
|
||||
skip_dialog_name=None, skipped_value=True):
|
||||
if self.is_minimized_to_tray:
|
||||
return ans_when_user_unavailable
|
||||
return question_dialog(self, title, msg, det_msg=det_msg,
|
||||
show_copy_button=show_copy_button)
|
||||
show_copy_button=show_copy_button,
|
||||
skip_dialog_name=skip_dialog_name,
|
||||
skip_dialog_skipped_value=skipped_value)
|
||||
|
||||
def hide_windows(self):
|
||||
for window in QApplication.topLevelWidgets():
|
||||
|
@ -269,7 +269,7 @@ How do I use |app| with my Android phone/tablet?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are two ways that you can connect your Android device to calibre. Using a USB cable-- or wirelessly, over the air.
|
||||
**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**.
|
||||
**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**. Some devices may have a setting to put them in "disk mode" or "USB Transfer mode" that is needed before they act as USB disks.
|
||||
|
||||
Using a USB cable
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -4,9 +4,9 @@
|
||||
#
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: calibre 0.8.31\n"
|
||||
"POT-Creation-Date: 2011-12-16 09:38+IST\n"
|
||||
"PO-Revision-Date: 2011-12-16 09:38+IST\n"
|
||||
"Project-Id-Version: calibre 0.8.32\n"
|
||||
"POT-Creation-Date: 2011-12-23 08:40+IST\n"
|
||||
"PO-Revision-Date: 2011-12-23 08:40+IST\n"
|
||||
"Last-Translator: Automatically generated\n"
|
||||
"Language-Team: LANGUAGE\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
@ -24,8 +24,8 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/db/cache.py:105
|
||||
#: /home/kovid/work/calibre/src/calibre/db/cache.py:108
|
||||
#: /home/kovid/work/calibre/src/calibre/db/cache.py:119
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:282
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:283
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:285
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:286
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:99
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:100
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/jetbook/driver.py:74
|
||||
@ -36,8 +36,8 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:71
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prs500/books.py:267
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:660
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:328
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:329
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:330
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:331
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/usbms/driver.py:485
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:106
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:109
|
||||
@ -97,17 +97,17 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:85
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:128
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:169
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:748
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1005
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1007
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1009
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:749
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1006
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1008
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1010
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/utils.py:299
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/writer2/indexer.py:497
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:145
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:147
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:818
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:273
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:277
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:333
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:337
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:142
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:149
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:66
|
||||
@ -137,8 +137,8 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/pdf/writer.py:103
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:320
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:322
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:387
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:395
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:410
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:418
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/add.py:157
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:378
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:381
|
||||
@ -889,15 +889,15 @@ msgstr ""
|
||||
msgid "Communicate with Android phones."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:152
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:154
|
||||
msgid "Comma separated list of directories to send e-books to on the device. The first one that exists will be used"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:222
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:225
|
||||
msgid "Communicate with S60 phones."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:241
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:244
|
||||
msgid "Communicate with WebOS tablets."
|
||||
msgstr ""
|
||||
|
||||
@ -993,8 +993,8 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:102
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:447
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:470
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:547
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:566
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:549
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:568
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1052
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1058
|
||||
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1093
|
||||
@ -2036,7 +2036,7 @@ msgid "When creating a TOC from links in the input document, allow duplicate ent
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:281
|
||||
msgid "An XPath expression to detect chapter titles. The default is to consider <h1> or <h2> tags that contain the words \"chapter\",\"book\",\"section\" or \"part\" as chapter titles as well as any tags that have class=\"chapter\". The expression used must evaluate to a list of elements. To disable chapter detection, use the expression \"/\". See the XPath Tutorial in the calibre User Manual for further help on using this feature."
|
||||
msgid "An XPath expression to detect chapter titles. The default is to consider <h1> or <h2> tags that contain the words \"chapter\",\"book\",\"section\", \"prologue\", \"epilogue\", or \"part\" as chapter titles as well as any tags that have class=\"chapter\". The expression used must evaluate to a list of elements. To disable chapter detection, use the expression \"/\". See the XPath Tutorial in the calibre User Manual for further help on using this feature."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:295
|
||||
@ -2934,7 +2934,7 @@ msgid ""
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1417
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1238
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1244
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:943
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:41
|
||||
msgid "Cover"
|
||||
@ -3085,70 +3085,70 @@ msgstr ""
|
||||
msgid "No details available"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1239
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1245
|
||||
msgid "Title Page"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1240
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1246
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/htmltoc.py:15
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main.py:56
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:199
|
||||
msgid "Table of Contents"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1241
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1247
|
||||
msgid "Index"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1242
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1248
|
||||
msgid "Glossary"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1243
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1249
|
||||
msgid "Acknowledgements"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1244
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1250
|
||||
msgid "Bibliography"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1245
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1251
|
||||
msgid "Colophon"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1246
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1252
|
||||
msgid "Copyright"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1247
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1253
|
||||
msgid "Dedication"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1248
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1254
|
||||
msgid "Epigraph"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1249
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1255
|
||||
msgid "Foreword"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1250
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1256
|
||||
msgid "List of Illustrations"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1251
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1257
|
||||
msgid "List of Tables"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1252
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1258
|
||||
msgid "Notes"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1253
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1259
|
||||
msgid "Preface"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1254
|
||||
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1260
|
||||
msgid "Main Text"
|
||||
msgstr ""
|
||||
|
||||
@ -3681,7 +3681,11 @@ msgstr ""
|
||||
msgid "tag browser categories not to display"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:506
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:280
|
||||
msgid "Show this confirmation again"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:529
|
||||
msgid "Choose Files"
|
||||
msgstr ""
|
||||
|
||||
@ -4186,7 +4190,7 @@ msgid "Create a catalog of the books in your calibre library"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/actions/convert.py:88
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:591
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:594
|
||||
msgid "Cannot convert"
|
||||
msgstr ""
|
||||
|
||||
@ -6643,7 +6647,7 @@ msgstr ""
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/convert/single_ui.py:117
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info_ui.py:69
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/comicconf_ui.py:96
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box_ui.py:52
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box_ui.py:21
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/progress_ui.py:53
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/store/mobileread_store_dialog_ui.py:61
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog_ui.py:50
|
||||
@ -7279,6 +7283,22 @@ msgstr ""
|
||||
msgid "Reset author to Unknown"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:72
|
||||
msgid "Some invalid ISBNs"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:73
|
||||
msgid "Some of the ISBNs you entered were invalid. They will be ignored. Click Show Details to see which ones. Do you want to proceed?"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:79
|
||||
msgid "All invalid ISBNs"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:80
|
||||
msgid "All the ISBNs you entered were invalid. No books can be added."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn_ui.py:63
|
||||
msgid "Add books by ISBN"
|
||||
msgstr ""
|
||||
@ -7920,35 +7940,35 @@ msgstr ""
|
||||
msgid "&Hide all jobs"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:49
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:50
|
||||
msgid "&Copy to clipboard"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:53
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:54
|
||||
msgid "Show &details"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:54
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:55
|
||||
msgid "Hide &details"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:58
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:59
|
||||
msgid "Show detailed information about this error"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:100
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:101
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/__init__.py:552
|
||||
msgid "Copied"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:138
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:139
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:796
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:205
|
||||
msgid "Copy to clipboard"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:184
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:232
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:185
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:233
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:860
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:961
|
||||
msgid "View log"
|
||||
@ -10063,7 +10083,7 @@ msgid "None"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/keyboard.py:389
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:165
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:166
|
||||
msgid "Done"
|
||||
msgstr ""
|
||||
|
||||
@ -10404,7 +10424,7 @@ msgid "Failed to create calibre library at: %r."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:108
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:170
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:169
|
||||
msgid "Choose a location for your new calibre e-book library"
|
||||
msgstr ""
|
||||
|
||||
@ -10412,74 +10432,74 @@ msgstr ""
|
||||
msgid "Initializing user interface..."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:164
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:163
|
||||
msgid "Repairing failed"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:165
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:164
|
||||
msgid "The database repair failed. Starting with a new empty library."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:179
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:204
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:178
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:203
|
||||
msgid "Bad database location"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:180
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:179
|
||||
#, python-format
|
||||
msgid "Bad database location %r. calibre will now quit."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:192
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:494
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:191
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:497
|
||||
msgid "Corrupted database"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:193
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:192
|
||||
#, python-format
|
||||
msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful. If you say No, a new empty calibre library will be created."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:205
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:204
|
||||
#, python-format
|
||||
msgid "Bad database location %r. Will start with a new, empty calibre library"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:215
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:214
|
||||
#, python-format
|
||||
msgid "Starting %s: Loading books..."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:295
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:294
|
||||
msgid "If you are sure it is not running"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:298
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:297
|
||||
msgid "may be running in the system tray, in the"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:300
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:299
|
||||
msgid "upper right region of the screen."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:302
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:301
|
||||
msgid "lower right region of the screen."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:305
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:304
|
||||
msgid "try rebooting your computer."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:307
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:321
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:306
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:320
|
||||
msgid "try deleting the file"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:310
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:309
|
||||
msgid "Cannot Start "
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:311
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:310
|
||||
#, python-format
|
||||
msgid "%s is already running."
|
||||
msgstr ""
|
||||
@ -11097,7 +11117,7 @@ msgstr ""
|
||||
msgid "All on 1 tab"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:166
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:167
|
||||
msgid "Confirmation dialogs have all been reset"
|
||||
msgstr ""
|
||||
|
||||
@ -12623,7 +12643,7 @@ msgid "Here you can control how calibre will save your books when you click the
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:70
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:432
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:435
|
||||
msgid "Failed to start content server"
|
||||
msgstr ""
|
||||
|
||||
@ -13791,39 +13811,39 @@ msgstr ""
|
||||
msgid "You have started calibre in debug mode. After you quit calibre, the debug log will be available in the file: %s<p>The log will be displayed automatically."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:495
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:498
|
||||
#, python-format
|
||||
msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful."
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:579
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:582
|
||||
msgid "Conversion Error"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:602
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:605
|
||||
msgid "Recipe Disabled"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:618
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:621
|
||||
msgid "<b>Failed</b>"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:652
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:655
|
||||
msgid "There are active jobs. Are you sure you want to quit?"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:655
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:658
|
||||
msgid ""
|
||||
" is communicating with the device!<br>\n"
|
||||
" Quitting may cause corruption on the device.<br>\n"
|
||||
" Are you sure you want to quit?"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:659
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:662
|
||||
msgid "Active jobs"
|
||||
msgstr ""
|
||||
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:727
|
||||
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:730
|
||||
msgid "will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray."
|
||||
msgstr ""
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user