Merge from trunk

This commit is contained in:
Charles Haley 2011-12-23 09:13:13 +01:00
commit b7f2bb23df
147 changed files with 25886 additions and 21914 deletions

View File

@ -19,6 +19,81 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.8.32
date: 2011-12-23
new features:
- title: "Linux: When deleting books, send them to the recycle bin, instead of permanently deleting. This is the same behavior as on Windows and OS X."
- title: "Add a checkbox to allow users to disable the popup that asks if books should be auto-converted before sending to device"
- title: "Drivers for Droid Razr, Samsung GT-I9003 and Bookeen Odyssey"
tickets: [906356, 906056, 905862]
- title: "Allow passing multiple filenames as command line arguments to calibre, to add multiple books."
tickets: [907968]
bug fixes:
- title: "MOBI Output: Fix regression in 0.8.30 that caused the use of hidden heading elements for the TOC to generate links in the wrong place."
tickets: [907156]
- title: "EPUB Output: Ensure directories have the correct permissions bits set when unzipping an epub with unzip on Unix"
- title: "Fix bottom most shortcuts in keyboard shortcuts for viewer not editable"
- title: "EPUB Output: Fix handling of self closing <audio> tags."
tickets: [906521]
- title: "MOBI Input: Map invalid <o:p> tags to <p> tags before parsing, to handle broken nesting."
tickets: [905715]
- title: "Conversion pipeline: HTML5 parsing: Fix handling of XML namespaces. Fixes regression in 0.8.30 that caused some articles in some news downloads to appear blank when viewed in Adobe Digital Editions based readers"
- title: "Get Books: Gandalf store, fix price and cover detection"
- title: "EPUB Output: Fix the Flatten filenames option in EPUB Output causing duplicated manifest ids in rare cases."
tickets: [905692]
- title: "When adding books via ISBN, show the user the list of invalid ISBNs that will be ignored, if any, before starting the add operation."
tickets: [905690]
- title: "Fix unsmarten punctuation conversion option broken in 0.8.31."
tickets: [905596]
- title: "Fix broken evaluation of composite columns in save-to-disk"
improved recipes:
- Cosmopolitan UK
- Hindustan Times
- HVG
- moneynews.com
- Ming Pao
- Glasgow Herald
- Times of India
- Focus Magazine
- Hacker News
- Independent
- Sueddeutsche
new recipes:
- title: Prospect Magazine UK
author: Barty and duoloz
- title: Elet es Irodalom and NOL
author: Bigpapa
- title: Salonica Press News
author: SteliosGero
- title: Echo Online
author: Armin Geller
- title: Various Polish news sources
author: fenuks
- title: Various Italian news sources
author: faber1971
- version: 0.8.31 - version: 0.8.31
date: 2011-12-16 date: 2011-12-16

View File

@ -1,19 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class Adventure_zone(BasicNewsRecipe): class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone' title = u'Adventure Zone'
__author__ = 'fenuks' __author__ = 'fenuks'
description = 'Adventure zone - adventure games from A to Z' description = 'Adventure zone - adventure games from A to Z'
category = 'games' category = 'games'
language = 'pl' language = 'pl'
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
oldest_article = 20
max_articles_per_feed = 100
use_embedded_content=False
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
remove_tags_before= dict(name='td', attrs={'class':'main-bg'}) remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'}) remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
remove_tags_after= dict(id='comments')
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }' extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')] feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
tag=soup.find(name='channel')
titles=[]
for r in tag.findAll(name='image'):
r.extract()
art=tag.findAll(name='item')
for i in art:
titles.append(i.title.string)
for feed in feeds:
for article in feed.articles[:]:
article.title=titles[feed.articles.index(article)]
return feeds
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php') soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
cover=soup.find(id='box_OstatninumerAZ') cover=soup.find(id='box_OstatninumerAZ')
@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
skip_tag = soup.body.findAll(name='a') skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
if skip_tag is not None: skip_tag = skip_tag.findAll(name='a')
for r in skip_tag: for r in skip_tag:
if 'articles.php?' in r['href']: if r.strong:
if r.strong is not None: word=r.strong.string
word=r.strong.string if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
if ('zapowied' or 'recenzj') in word: return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
else:
None
def print_version(self, url):
return url.replace('news.php?readmore', 'print.php?type=N&item_id')

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AstroNEWS(BasicNewsRecipe): class AstroNEWS(BasicNewsRecipe):
title = u'AstroNEWS' title = u'AstroNEWS'
__author__ = 'fenuks' __author__ = 'fenuks'
@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
language = 'pl' language = 'pl'
oldest_article = 8 oldest_article = 8
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True #extra_css= 'table {text-align: left;}'
no_stylesheets=True
cover_url='http://news.astronet.pl/img/logo_news.jpg' cover_url='http://news.astronet.pl/img/logo_news.jpg'
# no_stylesheets= True remove_tags=[dict(name='hr')]
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')] feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
def print_version(self, url): def print_version(self, url):
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?') return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
def preprocess_html(self, soup):
for item in soup.findAll(align=True):
del item['align']
return soup

19
recipes/biolog_pl.recipe Normal file
View File

@ -0,0 +1,19 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class Biolog_pl(BasicNewsRecipe):
title = u'Biolog.pl'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds=True
__author__ = 'fenuks'
description = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
category = 'biology'
language = 'pl'
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
no_stylesheets = True
#keeps_only_tags=[dict(id='main')]
remove_tags_before=dict(id='main')
remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
feeds = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]

View File

@ -0,0 +1,44 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Birmingham post'
description = 'News for Birmingham UK'
timefmt = ''
__author__ = 'Dave Asbury'
cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
oldest_article = 1
max_articles_per_feed = 20
remove_empty_feeds = True
remove_javascript = True
auto_cleanup = True
language = 'en_GB'
masthead_url = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
keep_only_tags = [
#dict(name='h1',attrs={'id' : 'article-headline'}),
#dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
#dict(name='p')
#dict(attrs={'id' : 'three-col'})
]
remove_tags = [
# dict(name='div',attrs={'class' : 'span-33 last header-links'})
]
feeds = [
#(u'News',u'http://www.birminghampost.net/news/rss.xml'),
(u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
(u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
]
extra_css = '''
body {font: sans-serif medium;}'
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
span{ font-size:9.5px; font-weight:bold;font-style:italic}
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''

View File

@ -0,0 +1,22 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class Computerworld_pl(BasicNewsRecipe):
title = u'Computerworld.pl'
__author__ = 'fenuks'
description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
category = 'IT'
language = 'pl'
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100
keep_only_tags=[dict(name='div', attrs={'id':'s'})]
remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
def get_cover_url(self):
soup = self.index_to_soup('http://www.computerworld.pl/')
cover=soup.find(name='img', attrs={'class':'prawo'})
self.cover_url=cover['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -7,6 +7,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK' description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
__author__ = 'Dave Asbury' __author__ = 'Dave Asbury'
#last update 21/12/11
# greyscale code by Starson # greyscale code by Starson
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg' cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
no_stylesheets = True no_stylesheets = True
@ -31,8 +32,9 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}), dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}), dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}), dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}) dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
] dict(name='li',attrs={'class' : 'thumb'})
]
feeds = [ feeds = [
(u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')] (u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
@ -48,4 +50,3 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
img.type = "GrayscaleType" img.type = "GrayscaleType"
img.save(iurl) img.save(iurl)
return soup return soup

15
recipes/datasport.recipe Normal file
View File

@ -0,0 +1,15 @@
__license__ = 'GPL v3'
__author__ = 'faber1971'
description = 'Italian soccer news website - v1.00 (17, December 2011)'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324114272(BasicNewsRecipe):
title = u'Datasport'
language = 'it'
__author__ = 'faber1971'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]

View File

@ -0,0 +1,58 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Dziennik_pl(BasicNewsRecipe):
title = u'Dziennik.pl'
__author__ = 'fenuks'
description = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
category = 'newspaper'
language = 'pl'
cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
remove_empty_feeds=True
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: '')]
keep_only_tags=[dict(id='article')]
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
def append_page(self, soup, appendtag):
tag=soup.find('a', attrs={'class':'page_next'})
if tag:
appendtag.find('div', attrs={'class':'article_paginator'}).extract()
while tag:
soup2= self.index_to_soup(tag['href'])
tag=soup2.find('a', attrs={'class':'page_next'})
if not tag:
for r in appendtag.findAll('div', attrs={'class':'art_src'}):
r.extract()
pagetext = soup2.find(name='div', attrs={'class':'article_body'})
for dictionary in self.remove_tags:
v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
for delete in v:
delete.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
if appendtag.find('div', attrs={'class':'article_paginator'}):
appendtag.find('div', attrs={'class':'article_paginator'}).extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
'''
Fetch echo-online.de
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Echo_Online(BasicNewsRecipe):
title = u' Echo Online'
description = '-Echo Online-'
publisher = 'Echo Online GmbH'
category = 'News, Germany'
__author__ = 'Armin Geller' # 2011-12-17
language = 'de'
lang = 'de-DE'
encoding = 'iso-8859-1'
timefmt = ' [%a, %d %b %Y]'
oldest_article = 7
max_articles_per_feed = 2
no_stylesheets = True
auto_cleanup = True
remove_javascript = True
feeds = [
(u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
(u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
(u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
(u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
(u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
(u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
(u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
(u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
(u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
(u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
(u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
]
def print_version(self, url):
return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
auto_cleanup_keep = '//div[@class="bild_gross w270"]'
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-ash2/41801_145340745513489_893927_n.jpg' # 2011-12-16 AGe
cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif' # 2011-12-16 AGe

View File

@ -0,0 +1,48 @@
################################################################################
#Description: http://es.hu/ RSS channel
#Author: Bigpapa (bigpapabig@hotmail.com)
#Date: 2010.12.01. - V1.0
################################################################################
from calibre.web.feeds.recipes import BasicNewsRecipe
class elet_es_irodalom(BasicNewsRecipe):
title = u'Elet es Irodalom'
__author__ = 'Bigpapa'
oldest_article = 7
max_articles_per_feed = 20 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'iso-8859-2'
category = 'Cikkek'
language = 'hu'
publication_type = 'newsportal'
extra_css = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
keep_only_tags = [
dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
]
remove_tags = [
dict(name='a', attrs={'target':['_TOP']}),
dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
]
feeds = [
(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
(u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
]

16
recipes/emuzica_pl.recipe Normal file
View File

@ -0,0 +1,16 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class eMuzyka(BasicNewsRecipe):
title = u'eMuzyka'
__author__ = 'fenuks'
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
category = 'music'
language = 'pl'
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
remove_tags=[dict(name='span', attrs={'id':'date'})]
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]

18
recipes/fisco_oggi.recipe Normal file
View File

@ -0,0 +1,18 @@
__license__ = 'GPL v3'
__author__ = 'faber1971'
description = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324112023(BasicNewsRecipe):
title = u'Fisco Oggi'
language = 'it'
__author__ = 'faber1971'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
remove_javascript = True
no_stylesheets = True
feeds = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]

View File

@ -1,57 +1,68 @@
# -*- coding: utf-8 -*- import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Focus_pl(BasicNewsRecipe): class FocusRecipe(BasicNewsRecipe):
title = u'Focus.pl' __license__ = 'GPL v3'
oldest_article = 15 __author__ = u'intromatyk <intromatyk@gmail.com>'
max_articles_per_feed = 100 language = 'pl'
__author__ = 'fenuks' version = 1
language = 'pl'
description ='polish scientific monthly magazine' title = u'Focus'
publisher = u'Gruner + Jahr Polska'
category = u'News'
description = u'Newspaper'
category='magazine' category='magazine'
cover_url='' cover_url=''
remove_empty_feeds= True remove_empty_feeds= True
no_stylesheets=True no_stylesheets=True
remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'}) oldest_article = 7
remove_tags_after=dict(name='div', attrs={'class':'clear'}) max_articles_per_feed = 100000
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'), recursions = 0
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'), no_stylesheets = True
(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'), remove_javascript = True
(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'), encoding = 'utf-8'
(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'), # Seems to work best, but YMMV
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'), simultaneous_downloads = 5
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'), r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
h1{text-align: left;}
h2{font-size: medium; font-weight: bold;}
p.lead {font-weight: bold; text-align: left;}
.authordate {font-size: small; color: #696969;}
.fot{font-size: x-small; color: #666666;}
'''
feeds = [
] ('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
]
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
tag=soup.find(name='a') if ('advertisement' in soup.find('title').string.lower()):
if tag: href = soup.find('a').get('href')
new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True) return self.index_to_soup(href, raw=True)
return new_soup else:
return None
def append_page(self, appendtag):
tag=appendtag.find(name='div', attrs={'class':'arrows'})
if tag:
nexturl='http://www.focus.pl/'+tag.a['href']
for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
rem.extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
nexturl=None
pagetext=soup2.find(name='div', attrs={'class':'txt'})
tag=pagetext.find(name='div', attrs={'class':'arrows'})
for r in tag.findAll(name='a'):
if u'Następne' in r.string:
nexturl='http://www.focus.pl/'+r['href']
for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
rem.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def get_cover_url(self): def get_cover_url(self):
soup=self.index_to_soup('http://www.focus.pl/magazyn/') soup=self.index_to_soup('http://www.focus.pl/magazyn/')
@ -60,7 +71,14 @@ class Focus_pl(BasicNewsRecipe):
self.cover_url='http://www.focus.pl/' + tag.a['href'] self.cover_url='http://www.focus.pl/' + tag.a['href']
return getattr(self, 'cover_url', self.cover_url) return getattr(self, 'cover_url', self.cover_url)
def print_version(self, url):
def preprocess_html(self, soup): if url.count ('focus.pl.feedsportal.com'):
self.append_page(soup.body) u = url.find('focus0Bpl')
return soup u = 'http://www.focus.pl/' + url[u + 11:]
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace ('0E','-')
u = u.replace('/nc/1//story01.htm', '/do-druku/1')
else:
u = url.replace('/nc/1','/do-druku/1')
return u

View File

@ -1,4 +1,3 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class GlasgowHerald(BasicNewsRecipe): class GlasgowHerald(BasicNewsRecipe):
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
language = 'en_GB' language = 'en_GB'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
use_embedded_content = False
keep_only_tags = [dict(attrs={'class':'article'})] no_stylesheets = True
remove_tags = [ auto_cleanup = True
dict(id=['pic-nav']),
dict(attrs={'class':['comments-top']}) #keep_only_tags = [dict(attrs={'class':'article'})]
] #remove_tags = [
#dict(id=['pic-nav']),
#dict(attrs={'class':['comments-top']})
#]
feeds = [ feeds = [
@ -25,5 +28,4 @@ class GlasgowHerald(BasicNewsRecipe):
(u'Arts & Entertainment', (u'Arts & Entertainment',
u'http://www.heraldscotland.com/cmlink/1.768',), u'http://www.heraldscotland.com/cmlink/1.768',),
(u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')] (u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]

View File

@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
from urlparse import urlparse from urlparse import urlparse
import re import re
class HackerNews(BasicNewsRecipe): class HNWithCommentsLink(BasicNewsRecipe):
title = 'Hacker News' title = 'HN With Comments Link'
__author__ = 'Tom Scholl' __author__ = 'Tom Scholl & David Kerschner'
description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.' description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
publisher = 'Y Combinator' publisher = 'Y Combinator'
category = 'news, programming, it, technology' category = 'news, programming, it, technology'
@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
body = body + comments body = body + comments
return u'<html><title>' + title + u'</title><body>' + body + '</body></html>' return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
def parse_feeds(self):
a = super(HNWithCommentsLink, self).parse_feeds()
self.hn_articles = a[0].articles
return a
def get_obfuscated_article(self, url): def get_obfuscated_article(self, url):
if url.startswith('http://news.ycombinator.com'): if url.startswith('http://news.ycombinator.com'):
content = self.get_hn_content(url) content = self.get_hn_content(url)
@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
else: else:
content = self.get_readable_content(url) content = self.get_readable_content(url)
article = 0
for a in self.hn_articles:
if a.url == url:
article = a
content = re.sub(r'</body>\s*</html>\s*$', '', content) + article.summary + '</body></html>'
self.temp_files.append(PersistentTemporaryFile('_fa.html')) self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(content) self.temp_files[-1].write(content)
self.temp_files[-1].close() self.temp_files[-1].close()

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import urllib, re
class HindustanTimes(BasicNewsRecipe): class HindustanTimes(BasicNewsRecipe):
title = u'Hindustan Times' title = u'Hindustan Times'
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'), 'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
] ]
def get_article_url(self, article):
'''
HT uses a variant of the feedportal RSS ad display mechanism
'''
try:
s = article.summary
return urllib.unquote(
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
except:
pass
url = BasicNewsRecipe.get_article_url(self, article)
res = self.browser.open_novisit(url)
url = res.geturl().split('/')[-2]
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
'www.'}
for k, v in encoding.iteritems():
url = url.replace(k, v)
return url

View File

@ -1,44 +1,58 @@
# -*- coding: utf-8 -*- ################################################################################
import re #Description: http://hvg.hu/ RSS channel
from calibre.web.feeds.recipes import BasicNewsRecipe #Author: Bigpapa (bigpapabig@hotmail.com)
#Date: 2011.12.20. - V1.1
################################################################################
class HVG(BasicNewsRecipe): from calibre.web.feeds.news import BasicNewsRecipe
title = 'HVG.HU'
__author__ = u'István Papp'
description = u'Friss hírek a HVG-től'
timefmt = ' [%Y. %b. %d., %a.]'
oldest_article = 4
language = 'hu'
max_articles_per_feed = 100 class hvg(BasicNewsRecipe):
no_stylesheets = True title = u'HVG'
use_embedded_content = False __author__ = 'Bigpapa'
encoding = 'utf8' language = 'hu'
publisher = 'HVG Online' oldest_article = 5 # Hany napos legyen a legregebbi cikk amit leszedjen.
category = u'news, hírek, hvg' max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
extra_css = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' no_stylesheets = True
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')] encoding = 'utf8'
remove_tags_before = dict(id='pg-content') extra_css = ' h2 { font:bold 28px} '
remove_javascript = True
remove_empty_feeds = True
feeds = [ remove_attributes = ['style','font', 'href']
(u'Itthon', u'http://hvg.hu/rss/itthon')
,(u'Világ', u'http://hvg.hu/rss/vilag')
,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
,(u'Karrier', u'http://hvg.hu/rss/karrier')
,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
,(u'Kultúra', u'http://hvg.hu/rss/kultura')
,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
,(u'Sport', u'http://hvg.hu/rss/sport')
]
def print_version(self, url): keep_only_tags = [
return url.replace ('#rss', '/print') dict(name='div', attrs={'id':['pg-content']})
]
remove_tags = [
dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
dict(name='table', attrs={'class':['banner2', 'monocle']}),
dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
dict(name='h3', attrs={'class':['hthree']}),
dict(name='ul', attrs={'class':['defaultul']}),
dict(name='form', attrs={'id':['commentForm']}),
dict(name='h6', attrs={'class':['hthree']}),
dict(name='h6', attrs={'class':['more2']}),
dict(name='img', attrs={'class':['framed']}),
dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
]
feeds = [
# (u'\xd6sszes', 'http://hvg.hu/rss'),
(u'Itthon', 'http://hvg.hu/rss/itthon'),
(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
(u'Karrier', 'http://hvg.hu/rss/karrier'),
(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
(u'Sport', 'http://hvg.hu/rss/sport')
]

BIN
recipes/icons/biolog_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 373 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 481 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

BIN
recipes/icons/moneynews.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 914 B

View File

@ -109,7 +109,6 @@ class TheIndependentNew(BasicNewsRecipe):
picdiv = soup.find('img') picdiv = soup.find('img')
if picdiv is not None: if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src']) self.add_toc_thumbnail(article,picdiv['src'])
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -273,12 +272,15 @@ class TheIndependentNew(BasicNewsRecipe):
def _insertRatingStars(self,soup,item): def _insertRatingStars(self,soup,item):
if item.contents is None: if item.contents is None or len(item.contents) < 1:
return return
rating = item.contents[0] rating = item.contents[0]
if not rating.isdigit():
return None try:
rating = int(item.contents[0]) rating = float(item.contents[0])
except:
print 'Could not convert decimal rating to star: malformatted float.'
return
for i in range(1,6): for i in range(1,6):
star = Tag(soup,'img') star = Tag(soup,'img')
if i <= rating: if i <= rating:

View File

@ -0,0 +1,14 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class Kosmonauta(BasicNewsRecipe):
title = u'Kosmonauta.net'
__author__ = 'fenuks'
description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
category = 'astronomy'
language = 'pl'
cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]

View File

@ -10,6 +10,10 @@ __MakePeriodical__ = True
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Set it to False if you want to skip images (Default: True) # Set it to False if you want to skip images (Default: True)
__KeepImages__ = True __KeepImages__ = True
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
__IncludeSummary__ = False
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
__IncludeThumbnails__ = True
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True) # (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
__UseLife__ = True __UseLife__ = True
# (HK only) It is to disable premium content (Default: False) # (HK only) It is to disable premium content (Default: False)
@ -24,12 +28,15 @@ __Date__ = ''
''' '''
Change Log: Change Log:
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010' 2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing 2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing 2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles 2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional. 2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly. 2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free. 2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -53,6 +60,7 @@ Change Log:
2010/10/31: skip repeated articles in section pages 2010/10/31: skip repeated articles in section pages
''' '''
from calibre.utils.date import now as nowf
import os, datetime, re, mechanize import os, datetime, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
@ -60,11 +68,15 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
# MAIN CLASS # MAIN CLASS
class MPRecipe(BasicNewsRecipe): class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
@ -109,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: "</b>") lambda match: "</b>")
] ]
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
else:
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)' description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver' category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -127,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: ''), lambda match: ''),
] ]
elif __Region__ == 'Toronto': elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)' description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto' category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -161,9 +179,9 @@ class MPRecipe(BasicNewsRecipe):
def get_dtlocal(self): def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow() dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available # convert UTC to local hk time - at HKT 4.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24) dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24) # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24) dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -186,6 +204,18 @@ class MPRecipe(BasicNewsRecipe):
else: else:
return self.get_dtlocal().strftime("%Y-%m-%d") return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self): def get_fetchday(self):
if __Date__ <> '': if __Date__ <> '':
return __Date__[6:8] return __Date__[6:8]
@ -237,7 +267,7 @@ class MPRecipe(BasicNewsRecipe):
articles = self.parse_section2_txt(url, keystr) articles = self.parse_section2_txt(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
@ -274,7 +304,7 @@ class MPRecipe(BasicNewsRecipe):
articles = self.parse_section2_txt(url, keystr) articles = self.parse_section2_txt(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: # (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url) # articles = self.parse_section(url)
@ -291,7 +321,7 @@ class MPRecipe(BasicNewsRecipe):
articles = self.parse_section2_txt(url, keystr) articles = self.parse_section2_txt(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
if __InclPremium__ == True: if __InclPremium__ == True:
# parse column section articles directly from .txt files # parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl') for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
@ -299,7 +329,7 @@ class MPRecipe(BasicNewsRecipe):
articles = self.parse_section2_txt(url, keystr) articles = self.parse_section2_txt(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
@ -379,7 +409,7 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(i) title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
try: try:
br.open_novisit(url) br.open_novisit(url)
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''}) current_articles.append({'title': title, 'url': url, 'description': ''})
@ -406,7 +436,7 @@ class MPRecipe(BasicNewsRecipe):
included_urls.append(url) included_urls.append(url)
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
# parse from www.mingpaovan.com # parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl): def parse_section3(self, url, baseUrl):
self.get_fetchdate() self.get_fetchdate()
@ -528,7 +558,7 @@ class MPRecipe(BasicNewsRecipe):
photo = photo.replace('class="photo"', '') photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>' new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
new_html = new_raw_html + '</body></html>' new_html = new_raw_html + '</body></html>'
else: else:
# .txt based file # .txt based file
splitter = re.compile(r'\n') # Match non-digits splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">' new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
@ -591,23 +621,23 @@ class MPRecipe(BasicNewsRecipe):
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010') #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True: if __HiResImg__ == True:
# TODO: add a _ in front of an image url # TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1: if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html) imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser() br = mechanize.Browser()
br.set_handle_redirect(False) br.set_handle_redirect(False)
for img in imglist: for img in imglist:
gifimg = img.replace('jpg"', 'gif"') gifimg = img.replace('jpg"', 'gif"')
try: try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1]) br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg) new_html = new_html.replace(img, gifimg)
except: except:
# find the location of the first _ # find the location of the first _
pos = img.find('_') pos = img.find('_')
if pos > -1: if pos > -1:
# if found, insert _ after the first _ # if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:] newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg) new_html = new_html.replace(img, newimg)
else: else:
# if not found, insert _ after " # if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:]) new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1: elif url.rfind('life.mingpao.com') > -1:
@ -644,7 +674,7 @@ class MPRecipe(BasicNewsRecipe):
#print 'Use hi-res img', newimg #print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg) new_html = new_html.replace(img, newimg)
return new_html return new_html
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
@ -653,78 +683,154 @@ class MPRecipe(BasicNewsRecipe):
for item in soup.findAll(stype=True): for item in soup.findAll(stype=True):
del item['absmiddle'] del item['absmiddle']
return soup return soup
def populate_article_metadata(self, article, soup, first):
# thumbnails shouldn't be available if using hi-res images
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
img = soup.find('img')
if img is not None:
self.add_toc_thumbnail(article, img['src'])
try:
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
# look for content
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
if articlebody:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
textFound = False
for p in paras:
if not textFound:
summary_candidate = self.tag_to_string(p).strip()
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
if len(summary_candidate) > 0:
article.summary = article.text_summary = summary_candidate
textFound = True
else:
# display a simple text
#article.summary = article.text_summary = u'\u66f4\u591a......'
# display word counts
counts = 0
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
for p in paras:
summary_candidate = self.tag_to_string(p).strip()
counts += len(summary_candidate)
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
except:
self.log("Error creating article descriptions")
return
# override from the one in version 0.8.31
def create_opf(self, feeds, dir=None): def create_opf(self, feeds, dir=None):
if dir is None: if dir is None:
dir = self.output_dir dir = self.output_dir
if __UseChineseTitle__ == True: title = self.short_title()
if __Region__ == 'Hong Kong': # change 1: allow our own flag to tell if a periodical is to be generated
title = u'\u660e\u5831 (\u9999\u6e2f)' # also use customed date instead of current time
elif __Region__ == 'Vancouver': if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate() title = title + ' ' + self.get_fetchformatteddate()
if True: # end of change 1
mi = MetaInformation(title, [self.publisher]) # change 2: __appname__ replaced by newspaper publisher
mi.publisher = self.publisher __appname__ = self.publisher
mi.author_sort = self.publisher mi = MetaInformation(title, [__appname__])
if __MakePeriodical__ == True: mi.publisher = __appname__
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() mi.author_sort = __appname__
else: # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
mi.publication_type = self.publication_type+':'+self.short_title() if __MakePeriodical__ == True:
#mi.timestamp = nowf() mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = self.get_dtlocal() else:
mi.comments = self.description mi.publication_type = self.publication_type+':'+self.short_title()
if not isinstance(mi.comments, unicode): #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.comments = mi.comments.decode('utf-8', 'replace') # change 4: in the following, all the nowf() are changed to adjusted time
#mi.pubdate = nowf() # This one doesn't matter
mi.pubdate = self.get_dtlocal() mi.timestamp = nowf()
opf_path = os.path.join(dir, 'index.opf') # change 5: skip listing the articles
ncx_path = os.path.join(dir, 'index.ncx') #article_titles, aseen = [], set()
opf = OPFCreator(dir, mi) #for f in feeds:
# Add mastheadImage entry to <guide> section # for a in f:
mp = getattr(self, 'masthead_path', None) # if a.title and a.title not in aseen:
if mp is not None and os.access(mp, os.R_OK): # aseen.add(a.title)
from calibre.ebooks.metadata.opf2 import Guide # article_titles.append(force_unicode(a.title, 'utf-8'))
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] #mi.comments = self.description
manifest.append(os.path.join(dir, 'index.html')) #if not isinstance(mi.comments, unicode):
manifest.append(os.path.join(dir, 'index.ncx')) # mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
# '\n\n'.join(article_titles))
# Get cover language = canonicalize_lang(self.language)
cpath = getattr(self, 'cover_path', None) if language is not None:
if cpath is None: mi.language = language
pf = open(os.path.join(dir, 'cover.jpg'), 'wb') # This one affects the pub date shown in kindle title
if self.default_cover(pf): #mi.pubdate = nowf()
cpath = pf.name # now appears to need the time field to be > 12.00noon as well
if cpath is not None and os.access(cpath, os.R_OK): mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf.cover = cpath opf_path = os.path.join(dir, 'index.opf')
manifest.append(cpath) ncx_path = os.path.join(dir, 'index.ncx')
# Get masthead opf = OPFCreator(dir, mi)
mpath = getattr(self, 'masthead_path', None) # Add mastheadImage entry to <guide> section
if mpath is not None and os.access(mpath, os.R_OK): mp = getattr(self, 'masthead_path', None)
manifest.append(mpath) if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent): def feed_index(num, parent):
f = feeds[num] f = feeds[num]
@ -739,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
desc = None desc = None
else: else:
desc = self.description_limiter(desc) desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir) entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None) po = self.play_order_map.get(entries[-1], None)
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), parent.add_item('%sindex.html'%adir, None,
play_order=po, author=auth, description=desc) a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp]) prefix = os.path.commonprefix([opf_path, sp])
@ -762,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f), templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed, not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix, a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar) center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
@ -785,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
if not desc: if not desc:
desc = None desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth)) f.title, play_order=po, description=desc, author=auth))
else: else:
entries.append('feed_%d/index.html'%0) entries.append('feed_%d/index.html'%0)
@ -799,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file) opf.render(opf_file, ncx_file)

View File

@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto # Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Toronto' __Region__ = 'Toronto'
# Users of Kindle 3 with limited system-level CJK support # Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False". # please replace the following "True" with "False". (Default: True)
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles # Turn below to True if your device supports display of CJK titles (Default: False)
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Set it to False if you want to skip images # Set it to False if you want to skip images (Default: True)
__KeepImages__ = True __KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source # Set it to True if you want to include a summary in Kindle's article view (Default: False)
__IncludeSummary__ = False
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
__IncludeThumbnails__ = True
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
__UseLife__ = True __UseLife__ = True
# (HK only) It is to disable premium content (Default: False)
__InclPremium__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
__ParsePFF__ = True
# (HK only) Turn below to True if you wish hi-res images (Default: False)
__HiResImg__ = False
# Override the date returned by the program if specifying a YYYYMMDD below
__Date__ = ''
''' '''
Change Log: Change Log:
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
@ -37,30 +60,38 @@ Change Log:
2010/10/31: skip repeated articles in section pages 2010/10/31: skip repeated articles in section pages
''' '''
import os, datetime, re from calibre.utils.date import now as nowf
import os, datetime, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
# MAIN CLASS # MAIN CLASS
class MPRecipe(BasicNewsRecipe): class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'), keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'class':['heading']}), # for heading from txt
dict(attrs={'id':['newscontent']}), # entertainment and column page content dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}), dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}), dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
dict(attrs={'class':['images']}) # for images from txt
] ]
if __KeepImages__: if __KeepImages__:
remove_tags = [dict(name='style'), remove_tags = [dict(name='style'),
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: "</b>") lambda match: "</b>")
] ]
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
else:
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)' description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver' category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: ''), lambda match: ''),
] ]
elif __Region__ == 'Toronto': elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)' description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto' category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
timefmt = '' timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self): def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow() dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available # convert UTC to local hk time - at HKT 4.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24) dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24) # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24) dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
return dt_local return dt_local
def get_fetchdate(self): def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d") if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self): def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d") if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self): def get_fetchday(self):
return self.get_dtlocal().strftime("%d") if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'), (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'), (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'), (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]: ]:
articles = self.parse_section2(url, keystr) if __InclPremium__ == True:
articles = self.parse_section2_txt(url, keystr)
else:
articles = self.parse_section2(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
else: else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
# special- editorial # special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles: #if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) # feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
# special - finance # special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles: #if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) # feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: articles = self.parse_section2_txt(url, keystr)
articles = self.parse_section(url)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url)
# if articles:
# feeds.append((title, articles))
# special - entertainment # special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles: #if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) # feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'), for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'), (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(a) title = self.tag_to_string(a)
url = a.get('href', False) url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url url = 'http://news.mingpao.com/' + dateStr + '/' +url
# replace the url to the print-friendly version
if __ParsePFF__ == True:
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
url = re.sub('%2F.*%2F', '/', url)
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
url = url.replace('%2Etxt', '_print.htm')
url = url.replace('%5F', '_')
else:
url = url.replace('.htm', '_print.htm')
if url not in included_urls and url.rfind('Redirect') == -1: if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url) included_urls.append(url)
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
# parse from life.mingpao.com # parse from life.mingpao.com
def parse_section2(self, url, keystr): def parse_section2(self, url, keystr):
br = mechanize.Browser()
br.set_handle_redirect(False)
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href=True) a = soup.findAll('a', href=True)
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(i) title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article try:
br.open_novisit(url)
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
except:
print 'skipping a premium article'
current_articles.reverse()
return current_articles
# parse from text file of life.mingpao.com
def parse_section2_txt(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''}) current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url) included_urls.append(url)
current_articles.reverse() current_articles.reverse()
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
# preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url):
new_html = raw_html
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
if url.rfind('_print.htm') <> -1:
# javascript based file
splitter = re.compile(r'\n')
new_raw_html = '<html><head><title>Untitled</title></head>'
new_raw_html = new_raw_html + '<body>'
for item in splitter.split(raw_html):
if item.startswith('var heading1 ='):
heading = item.replace('var heading1 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
new_raw_html = new_raw_html + '<div class="heading">' + heading
if item.startswith('var heading2 ='):
heading = item.replace('var heading2 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
if heading <> '':
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
else:
new_raw_html = new_raw_html + '</div>'
if item.startswith('var content ='):
content = item.replace("var content = ", '')
content = content.replace('\'', '')
content = content.replace(';', '')
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
if item.startswith('var photocontent ='):
photo = item.replace('var photocontent = \'', '')
photo = photo.replace('\'', '')
photo = photo.replace(';', '')
photo = photo.replace('<tr>', '')
photo = photo.replace('<td>', '')
photo = photo.replace('</tr>', '')
photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
new_html = new_raw_html + '</body></html>'
else:
# .txt based file
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_img_txt = False
title_started = False
title_break_reached = False
met_article_start_char = False
for item in splitter.split(raw_html):
item = item.strip()
# if title already reached but break between title and content not yet found, record title_break_reached
if title_started == True and title_break_reached == False and item == '':
title_break_reached = True
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
# start content
elif title_started == True and title_break_reached == True and met_article_start_char == False:
if item <> '':
met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
#if item.startswith(u'\u3010'):
# met_article_start_char = True
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else:
if next_is_img_txt == False:
if item.startswith("=@"):
print 'skip movie link'
elif item.startswith("=?"):
next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
elif item.startswith('=='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[2:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
elif item.startswith('='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[1:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else:
if next_is_img_txt == False and met_article_start_char == False:
if item <> '':
if title_started == False:
#print 'Title started at ', item
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else:
new_raw_html = new_raw_html + item + '<p>\n'
else:
next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n'
new_html = new_raw_html + '</div></body></html>'
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg)
else:
# if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg\'', 'gif\'')
try:
gifurl = re.sub(r'dailynews.*txt', '', url)
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
new_html = new_html.replace(img, newimg)
# repeat with src quoted by double quotes, for text parsed from src txt
imglist = re.findall('src="?.*?jpg"', new_html)
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg"', 'gif"')
try:
#print 'url', url
pos = url.rfind('/')
gifurl = url[:pos+1]
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.find('"')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg)
return new_html
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
del item['absmiddle'] del item['absmiddle']
return soup return soup
def populate_article_metadata(self, article, soup, first):
# thumbnails shouldn't be available if using hi-res images
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
img = soup.find('img')
if img is not None:
self.add_toc_thumbnail(article, img['src'])
try:
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
# look for content
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
if articlebody:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
textFound = False
for p in paras:
if not textFound:
summary_candidate = self.tag_to_string(p).strip()
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
if len(summary_candidate) > 0:
article.summary = article.text_summary = summary_candidate
textFound = True
else:
# display a simple text
#article.summary = article.text_summary = u'\u66f4\u591a......'
# display word counts
counts = 0
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
for p in paras:
summary_candidate = self.tag_to_string(p).strip()
counts += len(summary_candidate)
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
except:
self.log("Error creating article descriptions")
return
# override from the one in version 0.8.31
def create_opf(self, feeds, dir=None): def create_opf(self, feeds, dir=None):
if dir is None: if dir is None:
dir = self.output_dir dir = self.output_dir
if __UseChineseTitle__ == True: title = self.short_title()
if __Region__ == 'Hong Kong': # change 1: allow our own flag to tell if a periodical is to be generated
title = u'\u660e\u5831 (\u9999\u6e2f)' # also use customed date instead of current time
elif __Region__ == 'Vancouver': if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate() title = title + ' ' + self.get_fetchformatteddate()
if True: # end of change 1
mi = MetaInformation(title, [self.publisher]) # change 2: __appname__ replaced by newspaper publisher
mi.publisher = self.publisher __appname__ = self.publisher
mi.author_sort = self.publisher mi = MetaInformation(title, [__appname__])
if __MakePeriodical__ == True: mi.publisher = __appname__
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() mi.author_sort = __appname__
else: # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
mi.publication_type = self.publication_type+':'+self.short_title() if __MakePeriodical__ == True:
#mi.timestamp = nowf() mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = self.get_dtlocal() else:
mi.comments = self.description mi.publication_type = self.publication_type+':'+self.short_title()
if not isinstance(mi.comments, unicode): #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.comments = mi.comments.decode('utf-8', 'replace') # change 4: in the following, all the nowf() are changed to adjusted time
#mi.pubdate = nowf() # This one doesn't matter
mi.pubdate = self.get_dtlocal() mi.timestamp = nowf()
opf_path = os.path.join(dir, 'index.opf') # change 5: skip listing the articles
ncx_path = os.path.join(dir, 'index.ncx') #article_titles, aseen = [], set()
opf = OPFCreator(dir, mi) #for f in feeds:
# Add mastheadImage entry to <guide> section # for a in f:
mp = getattr(self, 'masthead_path', None) # if a.title and a.title not in aseen:
if mp is not None and os.access(mp, os.R_OK): # aseen.add(a.title)
from calibre.ebooks.metadata.opf2 import Guide # article_titles.append(force_unicode(a.title, 'utf-8'))
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] #mi.comments = self.description
manifest.append(os.path.join(dir, 'index.html')) #if not isinstance(mi.comments, unicode):
manifest.append(os.path.join(dir, 'index.ncx')) # mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
# '\n\n'.join(article_titles))
# Get cover language = canonicalize_lang(self.language)
cpath = getattr(self, 'cover_path', None) if language is not None:
if cpath is None: mi.language = language
pf = open(os.path.join(dir, 'cover.jpg'), 'wb') # This one affects the pub date shown in kindle title
if self.default_cover(pf): #mi.pubdate = nowf()
cpath = pf.name # now appears to need the time field to be > 12.00noon as well
if cpath is not None and os.access(cpath, os.R_OK): mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf.cover = cpath opf_path = os.path.join(dir, 'index.opf')
manifest.append(cpath) ncx_path = os.path.join(dir, 'index.ncx')
# Get masthead opf = OPFCreator(dir, mi)
mpath = getattr(self, 'masthead_path', None) # Add mastheadImage entry to <guide> section
if mpath is not None and os.access(mpath, os.R_OK): mp = getattr(self, 'masthead_path', None)
manifest.append(mpath) if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent): def feed_index(num, parent):
f = feeds[num] f = feeds[num]
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
desc = None desc = None
else: else:
desc = self.description_limiter(desc) desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir) entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None) po = self.play_order_map.get(entries[-1], None)
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), parent.add_item('%sindex.html'%adir, None,
play_order=po, author=auth, description=desc) a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp]) prefix = os.path.commonprefix([opf_path, sp])
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f), templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed, not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix, a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar) center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
if not desc: if not desc:
desc = None desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth)) f.title, play_order=po, description=desc, author=auth))
else: else:
entries.append('feed_%d/index.html'%0) entries.append('feed_%d/index.html'%0)
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file) opf.render(opf_file, ncx_file)

View File

@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto # Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Vancouver' __Region__ = 'Vancouver'
# Users of Kindle 3 with limited system-level CJK support # Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False". # please replace the following "True" with "False". (Default: True)
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles # Turn below to True if your device supports display of CJK titles (Default: False)
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Set it to False if you want to skip images # Set it to False if you want to skip images (Default: True)
__KeepImages__ = True __KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source # Set it to True if you want to include a summary in Kindle's article view (Default: False)
__IncludeSummary__ = False
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
__IncludeThumbnails__ = True
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
__UseLife__ = True __UseLife__ = True
# (HK only) It is to disable premium content (Default: False)
__InclPremium__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
__ParsePFF__ = True
# (HK only) Turn below to True if you wish hi-res images (Default: False)
__HiResImg__ = False
# Override the date returned by the program if specifying a YYYYMMDD below
__Date__ = ''
''' '''
Change Log: Change Log:
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages 2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
@ -37,30 +60,38 @@ Change Log:
2010/10/31: skip repeated articles in section pages 2010/10/31: skip repeated articles in section pages
''' '''
import os, datetime, re from calibre.utils.date import now as nowf
import os, datetime, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
# MAIN CLASS # MAIN CLASS
class MPRecipe(BasicNewsRecipe): class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'), keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'class':['heading']}), # for heading from txt
dict(attrs={'id':['newscontent']}), # entertainment and column page content dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}), dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}), dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
dict(attrs={'class':['images']}) # for images from txt
] ]
if __KeepImages__: if __KeepImages__:
remove_tags = [dict(name='style'), remove_tags = [dict(name='style'),
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: "</b>") lambda match: "</b>")
] ]
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
else:
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)' description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver' category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: ''), lambda match: ''),
] ]
elif __Region__ == 'Toronto': elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto' if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)' description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto' category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
timefmt = '' timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self): def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow() dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available # convert UTC to local hk time - at HKT 4.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24) dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24) # dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available # convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24) dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
return dt_local return dt_local
def get_fetchdate(self): def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d") if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self): def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d") if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self): def get_fetchday(self):
return self.get_dtlocal().strftime("%d") if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
if __Region__ == 'Hong Kong': if __Region__ == 'Hong Kong':
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'), (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'), (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'), (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'), (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]: ]:
articles = self.parse_section2(url, keystr) if __InclPremium__ == True:
articles = self.parse_section2_txt(url, keystr)
else:
articles = self.parse_section2(url, keystr)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
else: else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
articles = self.parse_section(url) articles = self.parse_section(url)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
# special- editorial # special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles: #if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) # feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
# special - finance # special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles: #if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) # feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: articles = self.parse_section2_txt(url, keystr)
articles = self.parse_section(url)
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url)
# if articles:
# feeds.append((title, articles))
# special - entertainment # special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles: #if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) # feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
if articles: if articles:
feeds.append((title, articles)) feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver': elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'), for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'), (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(a) title = self.tag_to_string(a)
url = a.get('href', False) url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url url = 'http://news.mingpao.com/' + dateStr + '/' +url
# replace the url to the print-friendly version
if __ParsePFF__ == True:
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
url = re.sub('%2F.*%2F', '/', url)
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
url = url.replace('%2Etxt', '_print.htm')
url = url.replace('%5F', '_')
else:
url = url.replace('.htm', '_print.htm')
if url not in included_urls and url.rfind('Redirect') == -1: if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url) included_urls.append(url)
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
# parse from life.mingpao.com # parse from life.mingpao.com
def parse_section2(self, url, keystr): def parse_section2(self, url, keystr):
br = mechanize.Browser()
br.set_handle_redirect(False)
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href=True) a = soup.findAll('a', href=True)
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(i) title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article try:
br.open_novisit(url)
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
except:
print 'skipping a premium article'
current_articles.reverse()
return current_articles
# parse from text file of life.mingpao.com
def parse_section2_txt(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''}) current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url) included_urls.append(url)
current_articles.reverse() current_articles.reverse()
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
# preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url):
new_html = raw_html
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
if url.rfind('_print.htm') <> -1:
# javascript based file
splitter = re.compile(r'\n')
new_raw_html = '<html><head><title>Untitled</title></head>'
new_raw_html = new_raw_html + '<body>'
for item in splitter.split(raw_html):
if item.startswith('var heading1 ='):
heading = item.replace('var heading1 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
new_raw_html = new_raw_html + '<div class="heading">' + heading
if item.startswith('var heading2 ='):
heading = item.replace('var heading2 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
if heading <> '':
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
else:
new_raw_html = new_raw_html + '</div>'
if item.startswith('var content ='):
content = item.replace("var content = ", '')
content = content.replace('\'', '')
content = content.replace(';', '')
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
if item.startswith('var photocontent ='):
photo = item.replace('var photocontent = \'', '')
photo = photo.replace('\'', '')
photo = photo.replace(';', '')
photo = photo.replace('<tr>', '')
photo = photo.replace('<td>', '')
photo = photo.replace('</tr>', '')
photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
new_html = new_raw_html + '</body></html>'
else:
# .txt based file
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_img_txt = False
title_started = False
title_break_reached = False
met_article_start_char = False
for item in splitter.split(raw_html):
item = item.strip()
# if title already reached but break between title and content not yet found, record title_break_reached
if title_started == True and title_break_reached == False and item == '':
title_break_reached = True
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
# start content
elif title_started == True and title_break_reached == True and met_article_start_char == False:
if item <> '':
met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
#if item.startswith(u'\u3010'):
# met_article_start_char = True
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else:
if next_is_img_txt == False:
if item.startswith("=@"):
print 'skip movie link'
elif item.startswith("=?"):
next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
elif item.startswith('=='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[2:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
elif item.startswith('='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[1:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else:
if next_is_img_txt == False and met_article_start_char == False:
if item <> '':
if title_started == False:
#print 'Title started at ', item
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else:
new_raw_html = new_raw_html + item + '<p>\n'
else:
next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n'
new_html = new_raw_html + '</div></body></html>'
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg)
else:
# if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg\'', 'gif\'')
try:
gifurl = re.sub(r'dailynews.*txt', '', url)
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
new_html = new_html.replace(img, newimg)
# repeat with src quoted by double quotes, for text parsed from src txt
imglist = re.findall('src="?.*?jpg"', new_html)
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg"', 'gif"')
try:
#print 'url', url
pos = url.rfind('/')
gifurl = url[:pos+1]
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.find('"')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg)
return new_html
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
del item['absmiddle'] del item['absmiddle']
return soup return soup
def populate_article_metadata(self, article, soup, first):
# thumbnails shouldn't be available if using hi-res images
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
img = soup.find('img')
if img is not None:
self.add_toc_thumbnail(article, img['src'])
try:
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
# look for content
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
if articlebody:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
textFound = False
for p in paras:
if not textFound:
summary_candidate = self.tag_to_string(p).strip()
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
if len(summary_candidate) > 0:
article.summary = article.text_summary = summary_candidate
textFound = True
else:
# display a simple text
#article.summary = article.text_summary = u'\u66f4\u591a......'
# display word counts
counts = 0
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
for p in paras:
summary_candidate = self.tag_to_string(p).strip()
counts += len(summary_candidate)
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
except:
self.log("Error creating article descriptions")
return
# override from the one in version 0.8.31
def create_opf(self, feeds, dir=None): def create_opf(self, feeds, dir=None):
if dir is None: if dir is None:
dir = self.output_dir dir = self.output_dir
if __UseChineseTitle__ == True: title = self.short_title()
if __Region__ == 'Hong Kong': # change 1: allow our own flag to tell if a periodical is to be generated
title = u'\u660e\u5831 (\u9999\u6e2f)' # also use customed date instead of current time
elif __Region__ == 'Vancouver': if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate() title = title + ' ' + self.get_fetchformatteddate()
if True: # end of change 1
mi = MetaInformation(title, [self.publisher]) # change 2: __appname__ replaced by newspaper publisher
mi.publisher = self.publisher __appname__ = self.publisher
mi.author_sort = self.publisher mi = MetaInformation(title, [__appname__])
if __MakePeriodical__ == True: mi.publisher = __appname__
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() mi.author_sort = __appname__
else: # change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
mi.publication_type = self.publication_type+':'+self.short_title() if __MakePeriodical__ == True:
#mi.timestamp = nowf() mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = self.get_dtlocal() else:
mi.comments = self.description mi.publication_type = self.publication_type+':'+self.short_title()
if not isinstance(mi.comments, unicode): #mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.comments = mi.comments.decode('utf-8', 'replace') # change 4: in the following, all the nowf() are changed to adjusted time
#mi.pubdate = nowf() # This one doesn't matter
mi.pubdate = self.get_dtlocal() mi.timestamp = nowf()
opf_path = os.path.join(dir, 'index.opf') # change 5: skip listing the articles
ncx_path = os.path.join(dir, 'index.ncx') #article_titles, aseen = [], set()
opf = OPFCreator(dir, mi) #for f in feeds:
# Add mastheadImage entry to <guide> section # for a in f:
mp = getattr(self, 'masthead_path', None) # if a.title and a.title not in aseen:
if mp is not None and os.access(mp, os.R_OK): # aseen.add(a.title)
from calibre.ebooks.metadata.opf2 import Guide # article_titles.append(force_unicode(a.title, 'utf-8'))
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] #mi.comments = self.description
manifest.append(os.path.join(dir, 'index.html')) #if not isinstance(mi.comments, unicode):
manifest.append(os.path.join(dir, 'index.ncx')) # mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
# '\n\n'.join(article_titles))
# Get cover language = canonicalize_lang(self.language)
cpath = getattr(self, 'cover_path', None) if language is not None:
if cpath is None: mi.language = language
pf = open(os.path.join(dir, 'cover.jpg'), 'wb') # This one affects the pub date shown in kindle title
if self.default_cover(pf): #mi.pubdate = nowf()
cpath = pf.name # now appears to need the time field to be > 12.00noon as well
if cpath is not None and os.access(cpath, os.R_OK): mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf.cover = cpath opf_path = os.path.join(dir, 'index.opf')
manifest.append(cpath) ncx_path = os.path.join(dir, 'index.ncx')
# Get masthead opf = OPFCreator(dir, mi)
mpath = getattr(self, 'masthead_path', None) # Add mastheadImage entry to <guide> section
if mpath is not None and os.access(mpath, os.R_OK): mp = getattr(self, 'masthead_path', None)
manifest.append(mpath) if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent): def feed_index(num, parent):
f = feeds[num] f = feeds[num]
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
desc = None desc = None
else: else:
desc = self.description_limiter(desc) desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir) entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None) po = self.play_order_map.get(entries[-1], None)
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), parent.add_item('%sindex.html'%adir, None,
play_order=po, author=auth, description=desc) a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp]) prefix = os.path.commonprefix([opf_path, sp])
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f), templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed, not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix, a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar) center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
if not desc: if not desc:
desc = None desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth)) f.title, play_order=po, description=desc, author=auth))
else: else:
entries.append('feed_%d/index.html'%0) entries.append('feed_%d/index.html'%0)
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file) opf.render(opf_file, ncx_file)

View File

@ -0,0 +1,15 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class Mlody_technik(BasicNewsRecipe):
title = u'Mlody technik'
__author__ = 'fenuks'
description = u'Młody technik'
category = 'science'
language = 'pl'
cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
#keep_only_tags=[dict(id='container')]
feeds = [(u'Artyku\u0142y', u'http://www.mt.com.pl/feed')]

View File

@ -1,9 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
moneynews.newsmax.com www.moneynews.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -12,40 +10,40 @@ class MoneyNews(BasicNewsRecipe):
title = 'Moneynews.com' title = 'Moneynews.com'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Financial news worldwide' description = 'Financial news worldwide'
publisher = 'moneynews.com' publisher = 'Newsmax.com'
language = 'en' language = 'en'
category = 'news, finances, USA, business' category = 'news, finances, USA, business'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'utf8'
extra_css = 'img{display: block} body{font-family: Arial, Helvetica, sans-serif}'
html2lrf_options = [
'--comment', description conversion_options = {
, '--category', category 'comment' : description
, '--publisher', publisher , 'tags' : category
, '--ignore-tables' , 'publisher' : publisher
] , 'language' : language
, 'linearize_tables' : True
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' }
feeds = [ feeds = [
(u'Street Talk' , u'http://moneynews.newsmax.com/xml/streettalk.xml' ) (u'Street Talk' , u'http://www.moneynews.com/rss/StreetTalk/8.xml' )
,(u'Finance News' , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' ) ,(u'Finance News' , u'http://www.moneynews.com/rss/FinanceNews/4.xml' )
,(u'Economy' , u'http://moneynews.newsmax.com/xml/economy.xml' ) ,(u'Economy' , u'http://www.moneynews.com/rss/Economy/2.xml' )
,(u'Companies' , u'http://moneynews.newsmax.com/xml/companies.xml' ) ,(u'Companies' , u'http://www.moneynews.com/rss/Companies/6.xml' )
,(u'Markets' , u'http://moneynews.newsmax.com/xml/Markets.xml' ) ,(u'Markets' , u'http://www.moneynews.com/rss/Markets/7.xml' )
,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml' ) ,(u'Investing & Analysis' , u'http://www.moneynews.com/rss/InvestingAnalysis/17.xml')
] ]
keep_only_tags = [dict(name='div', attrs={'class':'copy'})]
keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
remove_tags = [ remove_tags = [
dict(name='td' , attrs={'id':'article_fontsize'}) dict(attrs={'class':['MsoNormal', 'MsoNoSpacing']}),
,dict(name='table', attrs={'id':'toolbox' }) dict(name=['object','link','embed','form','meta'])
,dict(name='tr' , attrs={'id':'noprint3' })
] ]
def print_version(self, url):
nodeid = url.rpartition('/')[2]
return 'http://www.moneynews.com/PrintTemplate?nodeid=' + nodeid

View File

@ -7,6 +7,7 @@ class naczytniki(BasicNewsRecipe):
language = 'pl' language = 'pl'
description ='everything about e-readers' description ='everything about e-readers'
category='readers' category='readers'
no_stylesheets=True
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_tags_after= dict(name='div', attrs={'class':'sociable'}) remove_tags_after= dict(name='div', attrs={'class':'sociable'})

54
recipes/nol.recipe Normal file
View File

@ -0,0 +1,54 @@
################################################################################
#Description: http://nol.hu/ RSS channel
#Author: Bigpapa (bigpapabig@hotmail.com)
#Date: 2011.12.18. - V1.1
################################################################################
from calibre.web.feeds.recipes import BasicNewsRecipe
class NOL(BasicNewsRecipe):
title = u'NOL'
__author__ = 'Bigpapa'
oldest_article = 5
max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
language = 'hu'
publication_type = 'newsportal'
conversion_options ={
'linearize_tables' : True,
}
keep_only_tags = [
dict(name='table', attrs={'class':['article-box']})
]
remove_tags = [
dict(name='div', attrs={'class':['h','ad-container-outer','tags noborder','ad-container-inner','image-container-lead','tags','related-container']}),
dict(name='h4'),
dict(name='tfoot'),
dict(name='td', attrs={'class':['foot']}),
dict(name='span', attrs={'class':['image-container-caption']}),
]
feeds = [
# (u'V\xe1logat\xe1s', 'http://nol.hu/feed/valogatas.rss'),
(u'Belf\xf6ld', 'http://nol.hu/feed/belfold.rss'),
(u'K\xfclf\xf6ld', 'http://nol.hu/feed/kulfold.rss'),
(u'Gazdas\xe1g', 'http://nol.hu/feed/gazdasag.rss'),
(u'V\xe9lem\xe9ny', 'http://nol.hu/feed/velemeny.rss'),
(u'Kult\xfara', 'http://nol.hu/feed/kult.rss'),
(u'Tud/Tech', 'http://nol.hu/feed/tud-tech.rss'),
(u'Sport', 'http://nol.hu/feed/sport.rss'),
(u'Noller', 'http://nol.hu/feed/noller.rss'),
(u'Mozaik', 'http://nol.hu/feed/mozaik.rss'),
(u'Utaz\xe1s', 'http://nol.hu/feed/utazas.rss'),
(u'Aut\xf3', 'http://nol.hu/feed/auto.rss'),
(u'Voks', 'http://nol.hu/feed/voks.rss'),
]

View File

@ -1,20 +1,21 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Nowa_Fantastyka(BasicNewsRecipe): class Nowa_Fantastyka(BasicNewsRecipe):
title = u'Nowa Fantastyka' title = u'Nowa Fantastyka'
oldest_article = 7 oldest_article = 7
__author__ = 'fenuks' __author__ = 'fenuks'
language = 'pl' language = 'pl'
encoding='latin2'
description ='site for fantasy readers' description ='site for fantasy readers'
category='fantasy' category='fantasy'
max_articles_per_feed = 100 max_articles_per_feed = 100
INDEX='http://www.fantastyka.pl/' INDEX='http://www.fantastyka.pl/'
no_stylesheets=True
needs_subscription = 'optional'
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'}) remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'}) #remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'}) remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
remove_tags=[dict(attrs={'class':'avatar2'})] remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
feeds = []
def find_articles(self, url): def find_articles(self, url):
articles = [] articles = []
@ -45,3 +46,13 @@ class Nowa_Fantastyka(BasicNewsRecipe):
cover=soup.find(name='img', attrs={'class':'okladka'}) cover=soup.find(name='img', attrs={'class':'okladka'})
self.cover_url=self.INDEX+ cover['src'] self.cover_url=self.INDEX+ cover['src']
return getattr(self, 'cover_url', self.cover_url) return getattr(self, 'cover_url', self.cover_url)
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.fantastyka.pl/')
br.select_form(nr=0)
br['login'] = self.username
br['pass'] = self.password
br.submit()
return br

View File

@ -0,0 +1,79 @@
#!/usr/bin/env python
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
__license__ = 'GPL v3'
'''
calibre recipe for prospectmagazine.co.uk (subscription)
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class ProspectMagUK(BasicNewsRecipe):
title = u'Prospect Magazine'
description = 'A general-interest publication offering analysis and commentary about politics, news and business.'
__author__ = 'barty, duluoz'
timefmt = ' [%d %B %Y]'
no_stylesheets = True
publication_type = 'magazine'
masthead_url = 'http://www.prospectmagazine.co.uk/wp-content/themes/prospect/images/titleMain.jpg'
category = 'news, UK'
language = 'en_GB'
max_articles_per_feed = 100
auto_cleanup = True
needs_subscription = True
auto_cleanup_keep = '//div[@class="lead_image"]'
remove_tags = [{'class':['shareinpost','postutils','postinfo']}]
INDEX = 'http://www.prospectmagazine.co.uk/current-issue'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.prospectmagazine.co.uk/wp-login.php')
br.select_form(name='loginform')
br['log'] = self.username
br['pwd'] = self.password
br.submit()
return br
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
#div = soup.find('h1',text=re.compile(r'Issue \d+'))
#fname = self.tag_to_string( div) if div is not None else 'Current Issue'
div = soup.find('div', id='cover_image')
if div is not None:
img = div.find('img', src=True)
if img is not None:
src = img['src']
if src.startswith('/'):
src = 'http://www.prospectmagazine.co.uk' + src
self.cover_url = src
feeds = []
# loop through sections
for sect in soup.findAll('div',attrs={'class':'sectionheading'}):
fname = self.tag_to_string( sect).replace('>','').strip()
self.log('Found section', fname)
articles = []
# note: can't just find siblings with class='post' because that will also
# grab all the articles belonging to the sections that follow.
for item in sect.findNextSiblings('div',attrs={'class':True}):
if not 'post' in item['class']: break
a = item.find('a', href=True)
if a is None: continue
url = a['href']
title = self.tag_to_string(a)
p = item.find('p')
desc = self.tag_to_string( p) if p is not None else ''
art = {'title':title, 'description':desc,'date':' ', 'url':url}
p = item.find(attrs={'class':re.compile('author')})
self.log('\tFound article:', title, '::', url)
if p is not None:
art['author'] = self.tag_to_string( p).strip()
articles.append(art)
feeds.append((fname, articles))
return feeds

View File

@ -0,0 +1,17 @@
from calibre.web.feeds.news import BasicNewsRecipe
class spn(BasicNewsRecipe):
title = u'Salonica Press News'
language = 'gr'
__author__ = "SteliosGero"
oldest_article = 3
max_articles_per_feed = 100
auto_cleanup = True
category = 'news, GR'
language = 'el'
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', u'http://www.spnews.gr/politiki?format=feed&amp;type=rss'), (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', u'http://www.spnews.gr/oikonomia?format=feed&amp;type=rss'), (u'\u0391\u03c5\u03c4\u03bf\u03b4\u03b9\u03bf\u03af\u03ba\u03b7\u03c3\u03b7', u'http://www.spnews.gr/aftodioikisi?format=feed&amp;type=rss'), (u'\u039a\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1', u'http://www.spnews.gr/koinonia?format=feed&amp;type=rss'), (u'\u0391\u03b8\u03bb\u03b7\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/sports?format=feed&amp;type=rss'), (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae', u'http://www.spnews.gr/diethni?format=feed&amp;type=rss'), (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/politismos?format=feed&amp;type=rss'), (u'Media', u'http://www.spnews.gr/media-news?format=feed&amp;type=rss'), (u'\u0396\u03c9\u03ae', u'http://www.spnews.gr/zoi?format=feed&amp;type=rss'), (u'\u03a4\u03b5\u03c7\u03bd\u03bf\u03bb\u03bf\u03b3\u03af\u03b1', u'http://spnews.gr/texnologia?format=feed&amp;type=rss'), (u'\u03a0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd', u'http://spnews.gr/periballon?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03c0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parapolitika?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b4\u03b7\u03bc\u03bf\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/paradimotika?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b1\u03b8\u03bb\u03b7\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parathlitika?format=feed&amp;type=rss'), (u'\u0391\u03c0\u03cc\u03c8\u03b5\u03b9\u03c2', u'http://spnews.gr/apopseis?format=feed&amp;type=rss'), (u'\u03a3\u03c5\u03bd\u03b5\u03cd\u03be\u03b5\u03b9\u03c2', u'http://spnews.gr/synenteykseis?format=feed&amp;type=rss'), (u'Alert!', u'http://spnews.gr/alert?format=feed&amp;type=rss')]
def print_version(self, url):
return url+'?tmpl=component&print=1&layout=default&page='

View File

@ -8,8 +8,8 @@ class SpidersWeb(BasicNewsRecipe):
cover_url = 'http://www.spidersweb.pl/wp-content/themes/spiderweb/img/Logo.jpg' cover_url = 'http://www.spidersweb.pl/wp-content/themes/spiderweb/img/Logo.jpg'
category = 'IT, WEB' category = 'IT, WEB'
language = 'pl' language = 'pl'
no_stylesheers=True
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_tags_before=dict(name="h1", attrs={'class':'Title'}) keep_only_tags=[dict(id='Post')]
remove_tags_after=dict(name="div", attrs={'class':'Text'}) remove_tags=[dict(name='div', attrs={'class':['Comments', 'Shows', 'Post-Tags']})]
remove_tags=[dict(name='div', attrs={'class':['Tags', 'CommentCount FloatL', 'Show FloatL']})]
feeds = [(u'Wpisy', u'http://www.spidersweb.pl/feed')] feeds = [(u'Wpisy', u'http://www.spidersweb.pl/feed')]

View File

@ -6,92 +6,49 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch sueddeutsche.de Fetch sueddeutsche.de
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Sueddeutsche(BasicNewsRecipe): class Sueddeutsche(BasicNewsRecipe):
title = u'sueddeutsche.de' title = u'sueddeutsche.de'
description = 'News from Germany' description = 'News from Germany'
__author__ = 'Oliver Niesner and Armin Geller' #AGe 2011-12-16 __author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2011-12-16
use_embedded_content = False use_embedded_content = False
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 50 max_articles_per_feed = 50
no_stylesheets = True no_stylesheets = True
language = 'de' language = 'de'
auto_cleanup = True
encoding = 'utf-8' encoding = 'utf-8'
remove_javascript = True remove_javascript = True
cover_url = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1236175.1323967473!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe auto_cleanup = True
# 2011-12-16 AGe cover_url = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1237395.1324054345!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
# remove_tags = [ dict(name='link'), dict(name='iframe'),
# dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_AD",
# "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
#
# dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
# "pages closed","basebox right narrow","headslot galleried"]}),
#
# dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
# "item","videoBigButton","articlefooter full-column",
# "bildbanderolle full-column","footerCopy padleft5"]}),
#
# dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
# dict(name='div', attrs={'style':["position:relative;"]}),
# dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
# dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
# dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}),
# dict(name='td', attrs={'class':["artikelDruckenRight"]}),
# dict(name='p', text = "ANZEIGE")
# ]
# remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]
#
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
a{font-family:Arial,Helvetica,sans-serif; font-style:italic;}
.dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
.artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
body{font-family:Arial,Helvetica,sans-serif; }
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
#
feeds = [ feeds = [
# (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'), #AGe 2011-12-16 deactivated (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
# (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'), #AGe 2011-12-16 deactivated (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
# (u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'), #AGe 2011-12-16 deactivated (u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
# (u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'), #AGe 2011-12-16 deactivated (u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
# (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'), #AGe 2011-12-16 deactivated (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
# (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'), #AGe 2011-12-16 deactivated (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
# (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'), #AGe 2011-12-16 deactivated (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
# (u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'), # AGe 2011-11-13 (u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
# (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'), #AGe 2011-12-16 deactivated (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
# (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'), #AGe 2011-12-16 deactivated (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
# (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'), #AGe 2011-12-16 deactivated (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
# (u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'), #AGe 2011-12-16 deactivated (u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
# (u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'), #AGe 2011-12-16 deactivated (u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
# (u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'), #AGe 2011-12-16 deactivated (u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
# (u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'), #AGe 2011-12-16 deactivated (u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
# (u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated (u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
# (u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated (u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only
# (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only
# (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
# (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only
(u'Politik', u'http://www.sueddeutsche.de/app/service/rss/ressort/politik/rss.xml'),
(u'Wirtschaft', u'http://www.sueddeutsche.de/app/service/rss/ressort/wirtschaft/rss.xml'),
(u'Geld', u'http://www.sueddeutsche.de/app/service/rss/ressort/finanzen/rss.xml'),
(u'Kultur', u'http://www.sueddeutsche.de/app/service/rss/ressort/kultur/rss.xml'),
(u'Sport', u'http://www.sueddeutsche.de/app/service/rss/ressort/sport/rss.xml'),
(u'Leben', u'http://www.sueddeutsche.de/app/service/rss/ressort/leben/rss.xml'),
(u'Karriere', u'http://www.sueddeutsche.de/app/service/rss/ressort/karriere/rss.xml'),
(u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
(u'Bayern', u'http://www.sueddeutsche.de/app/service/rss/ressort/bayern/rss.xml'),
(u'Medien', u'http://www.sueddeutsche.de/app/service/rss/ressort/medien/rss.xml'),
(u'Digital', u'http://www.sueddeutsche.de/app/service/rss/ressort/computerwissen/rss.xml'),
(u'Auto', u'http://www.sueddeutsche.de/app/service/rss/ressort/autoreise/rss.xml'),
(u'Wissen', u'http://www.sueddeutsche.de/app/service/rss/ressort/wissen/rss.xml'),
(u'Panorama', u'http://www.sueddeutsche.de/app/service/rss/ressort/panorama/rss.xml'),
(u'Reise', u'http://www.sueddeutsche.de/app/service/rss/ressort/reise/rss.xml'),
] ]
# AGe 2011-12-16 Problem of Handling redirections solved by a solution of Recipes-Re-usable code from kiklop74.
# def print_version(self, url): #AGe 2011-12-16 deactivated # Feed is: http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss
# main, sep, id = url.rpartition('/') #AGe 2011-12-16 deactivated # Article download source is: http://sz.de/1.1237295 (Ski Alpin: Der Erfolg kommt, der Trainer geht)
# return main + '/2.220/' + id #AGe 2011-12-16 deactivated # Article source is: http://www.sueddeutsche.de/sport/ski-alpin-der-erfolg-kommt-der-trainer-geht-1.1237295
# Article printversion is: http://www.sueddeutsche.de/sport/2.220/ski-alpin-der-erfolg-kommt-der-trainer-geht-1.1237295
def print_version(self, url):
n_url=self.browser.open_novisit(url).geturl()
main, sep, id = n_url.rpartition('/')
return main + '/2.220/' + id

View File

@ -1,4 +1,4 @@
import re import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TimesOfIndia(BasicNewsRecipe): class TimesOfIndia(BasicNewsRecipe):
@ -17,7 +17,9 @@ class TimesOfIndia(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
{'class':re.compile('tabsintbgshow|prvnxtbg')}, {'class':re.compile('tabsintbgshow|prvnxtbg')},
{'id':['fbrecommend', 'relmaindiv']} {'id':['fbrecommend', 'relmaindiv', 'shretxt', 'fbrecos', 'twtdiv',
'gpls', 'auim']},
{'class':['twitter-share-button', 'cmtmn']},
] ]
feeds = [ feeds = [
@ -46,25 +48,27 @@ class TimesOfIndia(BasicNewsRecipe):
] ]
def get_article_url(self, article): def get_article_url(self, article):
# Times of India sometimes serves an ad page instead of the article, try:
# this code, detects and circumvents that s = article.summary
url = BasicNewsRecipe.get_article_url(self, article) return urllib.unquote(
if '/0Ltimesofindia' in url: re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
url = url.partition('/0L')[-1] except:
url = url.replace('0B', '.').replace('0N', '.com').replace('0C', pass
'/').replace('0E', '-') link = article.get('link', None)
url = 'http://' + url.rpartition('/')[0] if link and link.split('/')[-1]=="story01.htm":
match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url) link=link.split('/')[-2]
if match is not None: encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
num = match.group(1) '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://'}
num = re.sub(r'[^0-9]', '', num) for k, v in encoding.iteritems():
return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' % link = link.replace(k, v)
num) return link
else:
cms = re.search(r'/(\d+)\.cms', url)
if cms is not None:
return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
cms.group(1))
return url def print_version(self, url):
return url + '?prtpage=1'
def preprocess_html(self, soup, *args):
byl = soup.find(attrs={'class':'byline'})
if byl is not None:
for l in byl.findAll('label'):
l.extract()
return soup

17
recipes/tuttojove.recipe Normal file
View File

@ -0,0 +1,17 @@
__license__ = 'GPL v3'
__author__ = 'faber1971'
description = 'Italian website on Juventus F.C. - v1.00 (17, December 2011)'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305984536(BasicNewsRecipe):
title = u'tuttojuve'
description = 'Juventus'
language = 'it'
__author__ = 'faber1971'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'notizie', u'http://feeds.tuttojuve.com/rss/'), (u'da vinovo', u'http://feeds.tuttojuve.com/rss/?c=10'), (u'primo piano', u'http://feeds.tuttojuve.com/rss/?c=16'), (u'editoriale', u'http://feeds.tuttojuve.com/rss/?c=3'), (u'il punto', u'http://feeds.tuttojuve.com/rss/?c=8'), (u'pagelle', u'http://feeds.tuttojuve.com/rss/?c=9'), (u'avversario', u'http://feeds.tuttojuve.com/rss/?c=11')]
def print_version(self, url):
return self.browser.open_novisit(url).geturl()

View File

@ -8,14 +8,14 @@ msgstr ""
"Project-Id-Version: calibre\n" "Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n" "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2011-11-26 08:48+0000\n" "PO-Revision-Date: 2011-12-17 09:29+0000\n"
"Last-Translator: Jellby <Unknown>\n" "Last-Translator: Jellby <Unknown>\n"
"Language-Team: Spanish <es@li.org>\n" "Language-Team: Spanish <es@li.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-11-27 05:24+0000\n" "X-Launchpad-Export-Date: 2011-12-18 04:37+0000\n"
"X-Generator: Launchpad (build 14381)\n" "X-Generator: Launchpad (build 14525)\n"
#. name for aaa #. name for aaa
msgid "Ghotuo" msgid "Ghotuo"
@ -4963,7 +4963,7 @@ msgstr "Catawba"
#. name for chd #. name for chd
msgid "Chontal; Highland Oaxaca" msgid "Chontal; Highland Oaxaca"
msgstr "" msgstr "Chontal oaxaqueño de las tierras altas"
#. name for che #. name for che
msgid "Chechen" msgid "Chechen"
@ -4971,7 +4971,7 @@ msgstr "Checheno"
#. name for chf #. name for chf
msgid "Chontal; Tabasco" msgid "Chontal; Tabasco"
msgstr "" msgstr "Chontal de tabasco"
#. name for chg #. name for chg
msgid "Chagatai" msgid "Chagatai"
@ -4983,7 +4983,7 @@ msgstr "Chinook"
#. name for chj #. name for chj
msgid "Chinantec; Ojitlán" msgid "Chinantec; Ojitlán"
msgstr "" msgstr "Chinanteco de Ojitlán"
#. name for chk #. name for chk
msgid "Chuukese" msgid "Chuukese"
@ -5011,7 +5011,7 @@ msgstr "Chipewyan"
#. name for chq #. name for chq
msgid "Chinantec; Quiotepec" msgid "Chinantec; Quiotepec"
msgstr "" msgstr "Chinanteco de Quiotepec"
#. name for chr #. name for chr
msgid "Cherokee" msgid "Cherokee"
@ -5043,7 +5043,7 @@ msgstr "Cheyenne"
#. name for chz #. name for chz
msgid "Chinantec; Ozumacín" msgid "Chinantec; Ozumacín"
msgstr "" msgstr "Chinanteco de Ozumacín"
#. name for cia #. name for cia
msgid "Cia-Cia" msgid "Cia-Cia"
@ -5051,7 +5051,7 @@ msgstr "Cia-cia"
#. name for cib #. name for cib
msgid "Gbe; Ci" msgid "Gbe; Ci"
msgstr "" msgstr "Cigbe"
#. name for cic #. name for cic
msgid "Chickasaw" msgid "Chickasaw"
@ -5215,7 +5215,7 @@ msgstr "Arameo moderno caldeo"
#. name for cle #. name for cle
msgid "Chinantec; Lealao" msgid "Chinantec; Lealao"
msgstr "" msgstr "Chinanteco de Lealao"
#. name for clh #. name for clh
msgid "Chilisso" msgid "Chilisso"
@ -5239,7 +5239,7 @@ msgstr "Clallam"
#. name for clo #. name for clo
msgid "Chontal; Lowland Oaxaca" msgid "Chontal; Lowland Oaxaca"
msgstr "" msgstr "Chontal oaxaqueño de las tieras bajas"
#. name for clu #. name for clu
msgid "Caluyanun" msgid "Caluyanun"
@ -5251,7 +5251,7 @@ msgstr "Chulym"
#. name for cly #. name for cly
msgid "Chatino; Eastern Highland" msgid "Chatino; Eastern Highland"
msgstr "" msgstr "Chatino Lachao-Yolotepec"
#. name for cma #. name for cma
msgid "Maa" msgid "Maa"
@ -5327,7 +5327,7 @@ msgstr ""
#. name for cnl #. name for cnl
msgid "Chinantec; Lalana" msgid "Chinantec; Lalana"
msgstr "" msgstr "Chinanteco de Lalana"
#. name for cno #. name for cno
msgid "Con" msgid "Con"
@ -5339,7 +5339,7 @@ msgstr "Asmat central"
#. name for cnt #. name for cnt
msgid "Chinantec; Tepetotutla" msgid "Chinantec; Tepetotutla"
msgstr "" msgstr "Chinanteco de Tepetotutla"
#. name for cnu #. name for cnu
msgid "Chenoua" msgid "Chenoua"
@ -5355,7 +5355,7 @@ msgstr "Córnico medio"
#. name for coa #. name for coa
msgid "Malay; Cocos Islands" msgid "Malay; Cocos Islands"
msgstr "" msgstr "Malayo de las Islas Cocos"
#. name for cob #. name for cob
msgid "Chicomuceltec" msgid "Chicomuceltec"
@ -5391,7 +5391,7 @@ msgstr "Cochimi"
#. name for cok #. name for cok
msgid "Cora; Santa Teresa" msgid "Cora; Santa Teresa"
msgstr "" msgstr "Cora de Santa Teresa"
#. name for col #. name for col
msgid "Columbia-Wenatchi" msgid "Columbia-Wenatchi"
@ -5455,7 +5455,7 @@ msgstr "Chocho"
#. name for cpa #. name for cpa
msgid "Chinantec; Palantla" msgid "Chinantec; Palantla"
msgstr "" msgstr "Chinanteco de Palantla"
#. name for cpb #. name for cpb
msgid "Ashéninka; Ucayali-Yurúa" msgid "Ashéninka; Ucayali-Yurúa"
@ -5599,7 +5599,7 @@ msgstr "Cruzeño"
#. name for csa #. name for csa
msgid "Chinantec; Chiltepec" msgid "Chinantec; Chiltepec"
msgstr "" msgstr "Chinanteco de Chiltepec"
#. name for csb #. name for csb
msgid "Kashubian" msgid "Kashubian"
@ -5651,7 +5651,7 @@ msgstr "Lengua de signos colombiana"
#. name for cso #. name for cso
msgid "Chinantec; Sochiapan" msgid "Chinantec; Sochiapan"
msgstr "" msgstr "Chinanteco de Sochiapan"
#. name for csq #. name for csq
msgid "Croatia Sign Language" msgid "Croatia Sign Language"
@ -5683,7 +5683,7 @@ msgstr "Coos"
#. name for cta #. name for cta
msgid "Chatino; Tataltepec" msgid "Chatino; Tataltepec"
msgstr "" msgstr "Chatino de Tataltepec"
#. name for ctc #. name for ctc
msgid "Chetco" msgid "Chetco"
@ -5695,7 +5695,7 @@ msgstr ""
#. name for cte #. name for cte
msgid "Chinantec; Tepinapa" msgid "Chinantec; Tepinapa"
msgstr "" msgstr "Chinanteco de Tepinapa"
#. name for ctg #. name for ctg
msgid "Chittagonian" msgid "Chittagonian"
@ -5703,7 +5703,7 @@ msgstr "Chitagoniano"
#. name for ctl #. name for ctl
msgid "Chinantec; Tlacoatzintepec" msgid "Chinantec; Tlacoatzintepec"
msgstr "" msgstr "Chinanteco de Tlacoatzintepec"
#. name for ctm #. name for ctm
msgid "Chitimacha" msgid "Chitimacha"
@ -5719,7 +5719,7 @@ msgstr "Emberá-catío"
#. name for ctp #. name for ctp
msgid "Chatino; Western Highland" msgid "Chatino; Western Highland"
msgstr "" msgstr "Chatino"
#. name for cts #. name for cts
msgid "Bicolano; Northern Catanduanes" msgid "Bicolano; Northern Catanduanes"
@ -5735,7 +5735,7 @@ msgstr "Chol"
#. name for ctz #. name for ctz
msgid "Chatino; Zacatepec" msgid "Chatino; Zacatepec"
msgstr "" msgstr "Chatino de Zacatepec"
#. name for cua #. name for cua
msgid "Cua" msgid "Cua"
@ -5747,7 +5747,7 @@ msgstr "Cubeo"
#. name for cuc #. name for cuc
msgid "Chinantec; Usila" msgid "Chinantec; Usila"
msgstr "" msgstr "Chinanteco de Usila"
#. name for cug #. name for cug
msgid "Cung" msgid "Cung"
@ -5819,7 +5819,7 @@ msgstr "Chug"
#. name for cvn #. name for cvn
msgid "Chinantec; Valle Nacional" msgid "Chinantec; Valle Nacional"
msgstr "" msgstr "Chinanteco de Valle Nacional"
#. name for cwa #. name for cwa
msgid "Kabwa" msgid "Kabwa"
@ -5847,7 +5847,7 @@ msgstr "Kuwaataay"
#. name for cya #. name for cya
msgid "Chatino; Nopala" msgid "Chatino; Nopala"
msgstr "" msgstr "Chatino de Nopala"
#. name for cyb #. name for cyb
msgid "Cayubaba" msgid "Cayubaba"
@ -5871,7 +5871,7 @@ msgstr "Knaanic"
#. name for czn #. name for czn
msgid "Chatino; Zenzontepec" msgid "Chatino; Zenzontepec"
msgstr "" msgstr "Chatino de Zenzontepec"
#. name for czo #. name for czo
msgid "Chinese; Min Zhong" msgid "Chinese; Min Zhong"
@ -10603,7 +10603,7 @@ msgstr "Javanés"
#. name for jax #. name for jax
msgid "Malay; Jambi" msgid "Malay; Jambi"
msgstr "" msgstr "Malayo de Jambi"
#. name for jay #. name for jay
msgid "Yan-nhangu" msgid "Yan-nhangu"
@ -14743,7 +14743,7 @@ msgstr "Lorung meridional"
#. name for lrt #. name for lrt
msgid "Malay; Larantuka" msgid "Malay; Larantuka"
msgstr "" msgstr "Malayo de Larantuka"
#. name for lrv #. name for lrv
msgid "Larevat" msgid "Larevat"
@ -15099,7 +15099,7 @@ msgstr ""
#. name for max #. name for max
msgid "Malay; North Moluccan" msgid "Malay; North Moluccan"
msgstr "" msgstr "Malayo de las Molucas septentrional"
#. name for maz #. name for maz
msgid "Mazahua; Central" msgid "Mazahua; Central"
@ -15127,7 +15127,7 @@ msgstr ""
#. name for mbf #. name for mbf
msgid "Malay; Baba" msgid "Malay; Baba"
msgstr "" msgstr "Malayo baba"
#. name for mbh #. name for mbh
msgid "Mangseng" msgid "Mangseng"
@ -15467,7 +15467,7 @@ msgstr "Mende (Sierra Leona)"
#. name for meo #. name for meo
msgid "Malay; Kedah" msgid "Malay; Kedah"
msgstr "" msgstr "Malayo de Kedah"
#. name for mep #. name for mep
msgid "Miriwung" msgid "Miriwung"
@ -15511,7 +15511,7 @@ msgstr ""
#. name for mfa #. name for mfa
msgid "Malay; Pattani" msgid "Malay; Pattani"
msgstr "" msgstr "Malayo de Pattani"
#. name for mfb #. name for mfb
msgid "Bangka" msgid "Bangka"
@ -15571,7 +15571,7 @@ msgstr ""
#. name for mfp #. name for mfp
msgid "Malay; Makassar" msgid "Malay; Makassar"
msgstr "" msgstr "Malayo de Macasar"
#. name for mfq #. name for mfq
msgid "Moba" msgid "Moba"
@ -16059,7 +16059,7 @@ msgstr ""
#. name for mkn #. name for mkn
msgid "Malay; Kupang" msgid "Malay; Kupang"
msgstr "" msgstr "Malayo de Kupang"
#. name for mko #. name for mko
msgid "Mingang Doso" msgid "Mingang Doso"
@ -16207,7 +16207,7 @@ msgstr ""
#. name for mlz #. name for mlz
msgid "Malaynon" msgid "Malaynon"
msgstr "" msgstr "Malaynón"
#. name for mma #. name for mma
msgid "Mama" msgid "Mama"
@ -16623,7 +16623,7 @@ msgstr ""
#. name for mqg #. name for mqg
msgid "Malay; Kota Bangun Kutai" msgid "Malay; Kota Bangun Kutai"
msgstr "" msgstr "Malayo kutai de Kota Bangun"
#. name for mqh #. name for mqh
msgid "Mixtec; Tlazoyaltepec" msgid "Mixtec; Tlazoyaltepec"
@ -16839,7 +16839,7 @@ msgstr "Malgache masikoro"
#. name for msi #. name for msi
msgid "Malay; Sabah" msgid "Malay; Sabah"
msgstr "" msgstr "Malayo de Sabah"
#. name for msj #. name for msj
msgid "Ma (Democratic Republic of Congo)" msgid "Ma (Democratic Republic of Congo)"
@ -22607,7 +22607,7 @@ msgstr ""
#. name for sci #. name for sci
msgid "Creole Malay; Sri Lankan" msgid "Creole Malay; Sri Lankan"
msgstr "" msgstr "Malo criollo de Sri Lanka"
#. name for sck #. name for sck
msgid "Sadri" msgid "Sadri"
@ -27187,7 +27187,7 @@ msgstr ""
#. name for vkt #. name for vkt
msgid "Malay; Tenggarong Kutai" msgid "Malay; Tenggarong Kutai"
msgstr "" msgstr "Malayo kutai de Tenggarong"
#. name for vku #. name for vku
msgid "Kurrama" msgid "Kurrama"
@ -28395,7 +28395,7 @@ msgstr "Edomita"
#. name for xdy #. name for xdy
msgid "Dayak; Malayic" msgid "Dayak; Malayic"
msgstr "" msgstr "Dayak malayo"
#. name for xeb #. name for xeb
msgid "Eblan" msgid "Eblan"
@ -28727,7 +28727,7 @@ msgstr "Lengua de signos malasia"
#. name for xmm #. name for xmm
msgid "Malay; Manado" msgid "Malay; Manado"
msgstr "" msgstr "Malayo de Manado"
#. name for xmn #. name for xmn
msgid "Persian; Manichaean Middle" msgid "Persian; Manichaean Middle"

View File

@ -233,7 +233,7 @@ class GetTranslations(Translations): # {{{
if self.modified_translations: if self.modified_translations:
subprocess.check_call(['bzr', 'commit', '-m', subprocess.check_call(['bzr', 'commit', '-m',
'IGN:Updated translations', self.PATH]) 'IGN:Updated translations'])
else: else:
print('No updated translations available') print('No updated translations available')

View File

@ -527,7 +527,7 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252',
ent = match.group(1) ent = match.group(1)
if ent in exceptions: if ent in exceptions:
return '&'+ent+';' return '&'+ent+';'
if ent == 'apos': if ent in {'apos', 'squot'}: # squot is generated by some broken CMS software
return check("'") return check("'")
if ent == 'hellips': if ent == 'hellips':
ent = 'hellip' ent = 'hellip'

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 8, 31) numeric_version = (0, 8, 32)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -48,7 +48,8 @@ class ANDROID(USBMS):
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216], 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216], 0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216], 0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
0x70c6 : [0x226] 0x70c6 : [0x226],
0x4316 : [0x216],
}, },
# Freescale # Freescale
0x15a2 : { 0x15a2 : {
@ -87,6 +88,7 @@ class ANDROID(USBMS):
0x6877 : [0x0400], 0x6877 : [0x0400],
0x689e : [0x0400], 0x689e : [0x0400],
0xdeed : [0x0222], 0xdeed : [0x0222],
0x1234 : [0x0400],
}, },
# Viewsonic/Vizio # Viewsonic/Vizio
@ -170,13 +172,14 @@ class ANDROID(USBMS):
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A', 'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI', 'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107'] 'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
'GT-I9003_CARD', 'XT912']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL', '__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853', 'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F'] 'A1-07___C0541A4F', 'XT912']
OSX_MAIN_MEM = 'Android Device Main Memory' OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -170,8 +170,8 @@ class ODYSSEY(N516):
description = _('Communicate with the Cybook Odyssey eBook reader.') description = _('Communicate with the Cybook Odyssey eBook reader.')
BCD = [0x316] BCD = [0x316]
VENDOR_NAME = 'LINUX' VENDOR_NAME = ['LINUX', 'BOOKEEN']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET' WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['FILE-STOR_GADGET', 'FLASH_DISK']
FORMATS = ['epub', 'fb2', 'html', 'pdf', 'txt'] FORMATS = ['epub', 'fb2', 'html', 'pdf', 'txt']

View File

@ -266,12 +266,14 @@ class PRST1(USBMS):
collections = booklist.get_collections(collections_attributes) collections = booklist.get_collections(collections_attributes)
with closing(sqlite.connect(dbpath)) as connection: with closing(sqlite.connect(dbpath)) as connection:
self.update_device_books(connection, booklist, source_id, plugboard) self.update_device_books(connection, booklist, source_id,
plugboard, dbpath)
self.update_device_collections(connection, booklist, collections, source_id) self.update_device_collections(connection, booklist, collections, source_id)
debug_print('PRST1: finished update_device_database') debug_print('PRST1: finished update_device_database')
def update_device_books(self, connection, booklist, source_id, plugboard): def update_device_books(self, connection, booklist, source_id, plugboard,
dbpath):
opts = self.settings() opts = self.settings()
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS] upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS] refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
@ -284,12 +286,12 @@ class PRST1(USBMS):
query = 'SELECT file_path, _id FROM books' query = 'SELECT file_path, _id FROM books'
cursor.execute(query) cursor.execute(query)
except DatabaseError: except DatabaseError:
raise DeviceError('The SONY database is corrupted. ' raise DeviceError(('The SONY database is corrupted. '
' Delete the file %s on your reader and then disconnect ' ' Delete the file %s on your reader and then disconnect '
' reconnect it. If you are using an SD card, you ' ' reconnect it. If you are using an SD card, you '
' should delete the file on the card as well. Note that ' ' should delete the file on the card as well. Note that '
' deleting this file may cause your reader to forget ' ' deleting this file will cause your reader to forget '
' any notes/highlights, etc.') ' any notes/highlights, etc.')%dbpath)
db_books = {} db_books = {}
for i, row in enumerate(cursor): for i, row in enumerate(cursor):

View File

@ -276,11 +276,11 @@ OptionRecommendation(name='duplicate_links_in_toc',
OptionRecommendation(name='chapter', OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and " recommended_value="//*[((name()='h1' or name()='h2') and "
r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class " r"re:test(., '\s*((chapter|book|section|part)\s+)|((prolog|prologue|epilogue)(\s+|$))', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW, "= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default ' help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words ' 'is to consider <h1> or <h2> tags that contain the words '
'"chapter","book","section" or "part" as chapter titles as ' '"chapter","book","section", "prologue", "epilogue", or "part" as chapter titles as '
'well as any tags that have class="chapter". The expression ' 'well as any tags that have class="chapter". The expression '
'used must evaluate to a list of elements. To disable chapter ' 'used must evaluate to a list of elements. To disable chapter '
'detection, use the expression "/". See the XPath Tutorial ' 'detection, use the expression "/". See the XPath Tutorial '

View File

@ -35,7 +35,7 @@ def initialize_container(path_to_container, opf_name='metadata.opf',
'''.format(opf_name, extra_entries=rootfiles).encode('utf-8') '''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
zf = ZipFile(path_to_container, 'w') zf = ZipFile(path_to_container, 'w')
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED) zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
zf.writestr('META-INF/', '', 0700) zf.writestr('META-INF/', '', 0755)
zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr('META-INF/container.xml', CONTAINER)
for path, _, data in extra_entries: for path, _, data in extra_entries:
zf.writestr(path, data) zf.writestr(path, data)

View File

@ -1136,7 +1136,8 @@ class BinaryRecord(object): # {{{
self.raw = record.raw self.raw = record.raw
sig = self.raw[:4] sig = self.raw[:4]
name = '%06d'%idx name = '%06d'%idx
if sig in (b'FCIS', b'FLIS', b'SRCS', b'DATP'): if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
b'FDST', b'AUDI', b'VIDE',}:
name += '-' + sig.decode('ascii') name += '-' + sig.decode('ascii')
elif sig == b'\xe9\x8e\r\n': elif sig == b'\xe9\x8e\r\n':
name += '-' + 'EOF' name += '-' + 'EOF'

View File

@ -325,6 +325,7 @@ class MobiMLizer(object):
elem.text = None elem.text = None
elem.set('id', id_) elem.set('id', id_)
elem.tail = tail elem.tail = tail
elem.tag = XHTML('a')
else: else:
return return
tag = barename(elem.tag) tag = barename(elem.tag)

View File

@ -502,6 +502,7 @@ class MobiReader(object):
self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace('> <', '>\n<')
self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:') self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
self.processed_html = re.sub(r'<\?xml[^>]*>', '', self.processed_html) self.processed_html = re.sub(r'<\?xml[^>]*>', '', self.processed_html)
self.processed_html = re.sub(r'<(/?)o:p', r'<\1p', self.processed_html)
# Swap inline and block level elements, and order block level elements according to priority # Swap inline and block level elements, and order block level elements according to priority
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html) self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
@ -974,7 +975,7 @@ class MobiReader(object):
processed_records.append(i) processed_records.append(i)
data = self.sections[i][0] data = self.sections[i][0]
if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
b'RESC', b'BOUN', b'FDST', b'DATP'}: b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
# A FLIS, FCIS, SRCS or EOF record, ignore # A FLIS, FCIS, SRCS or EOF record, ignore
continue continue
buf = cStringIO.StringIO(data) buf = cStringIO.StringIO(data)

View File

@ -942,7 +942,13 @@ class Manifest(object):
if isinstance(data, etree._Element): if isinstance(data, etree._Element):
ans = xml2str(data, pretty_print=self.oeb.pretty_print) ans = xml2str(data, pretty_print=self.oeb.pretty_print)
if self.media_type in OEB_DOCS: if self.media_type in OEB_DOCS:
ans = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ans) # Convert self closing div|span|a tags to normally closed
# ones, as they are interpreted incorrectly by some browser
# based renderers
ans = re.sub(
# tag name followed by either a space or a /
r'<(?P<tag>div|a|span)(?=[\s/])(?P<arg>[^>]*)/>',
r'<\g<tag>\g<arg>></\g<tag>>', ans)
return ans return ans
if isinstance(data, unicode): if isinstance(data, unicode):
return data.encode('utf-8') return data.encode('utf-8')

View File

@ -4,6 +4,7 @@
### ###
Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net> Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
Released under the GPLv3 License Released under the GPLv3 License
Based on code originally written by Peter Sorotkin (epubcfi.js)
### ###
# #
log = (error) -> log = (error) ->
@ -159,6 +160,63 @@ class CanonicalFragmentIdentifier
point = {} point = {}
error = null error = null
offset = null
if (r = cfi.match(/^:(\d+)/)) != null
# Character offset
offset = parseInt(r[1])
cfi = cfi.substr(r[0].length)
if (r = cfi.match(/^~(-?\d+(\.\d+)?)/)) != null
# Temporal offset
point.time = r[1] - 0 # Coerce to number
cfi = cfi.substr(r[0].length)
if (r = cfi.match(/^@(-?\d+(\.\d+)?),(-?\d+(\.\d+)?)/)) != null
# Spatial offset
point.x = r[1] - 0 # Coerce to number
point.y = r[3] - 0 # Coerce to number
cfi = cfi.substr(r[0].length)
if( (r = cfi.match(/^\[([^\]]+)\]/)) != null )
assertion = r[1]
cfi = cfi.substr(r[0].length)
if (r = assertion.match(/;s=([ab])$/)) != null
if r.index > 0 and assertion[r.index - 1] != '^'
assertion = assertion.substr(0, r.index)
point.forward = (r[1] == 'a')
assertion = unescape_from_cfi(assertion)
# TODO: Handle text assertion
# Find the text node that contains the offset
node?.parentNode?.normalize()
if offset != null
while true
len = node.nodeValue.length
if offset < len or (not point.forward and offset == len)
break
next = false
while true
nn = node.nextSibling
if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata
next = nn
break
if not next
if offset > len
error = "Offset out of range: #{ offset }"
offset = len
break
node = next
offset -= len
point.offset = offset
point.node = node
if error
point.error = error
else if cfi.length > 0
point.error = "Undecoded CFI: #{ cfi }"
log(point.error)
point point
@ -192,7 +250,7 @@ class CanonicalFragmentIdentifier
cdoc = cd cdoc = cd
cwin = cdoc.defaultView cwin = cdoc.defaultView
target.normalize() (if target.parentNode then target.parentNode else target).normalize()
if name in ['audio', 'video'] if name in ['audio', 'video']
tail = "~" + fstr target.currentTime tail = "~" + fstr target.currentTime
@ -214,6 +272,67 @@ class CanonicalFragmentIdentifier
this.encode(doc, target, offset, tail) this.encode(doc, target, offset, tail)
# }}} # }}}
point: (cfi, doc=window?.document) -> # {{{
r = this.decode(cfi, doc)
if not r
return null
node = r.node
ndoc = node.ownerDocument
if not ndoc
log("CFI node has no owner document: #{ cfi } #{ node }")
return null
nwin = ndoc.defaultView
x = null
y = null
if typeof(r.offset) == "number"
# Character offset
range = ndoc.createRange()
if r.forward
try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}]
else
try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}]
k = 0
a = null
rects = null
node_len = node.nodeValue.length
until rects or rects.length or k >= try_list.length
t = try_list[k++]
start_offset = r.offset + t.start
end_offset = r.offset + t.end
a = t.a
if start_offset < 0 or end_offset >= node_len
continue
range.setStart(node, start_offset)
range.setEnd(node, end_offset)
rects = range.getClientRects()
if not rects or not rects.length
log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }")
return null
rect = rects[0]
x = (a*rect.left + (1-a)*rect.right)
y = (rect.top + rect.bottom)/2
else
x = node.offsetLeft - nwin.scrollX
y = node.offsetTop - nwin.scrollY
if typeof(r.x) == "number" and node.offsetWidth
x += (r.x*node.offsetWidth)/100
y += (r.y*node.offsetHeight)/100
until ndoc == doc
node = nwin.frameElement
ndoc = node.ownerDocument
nwin = ndoc.defaultView
x += node.offsetLeft - nwin.scrollX
y += node.offsetTop - nwin.scrollY
{x:x, y:y, node:r.node, time:r.time}
# }}}
if window? if window?
window.cfi = new CanonicalFragmentIdentifier() window.cfi = new CanonicalFragmentIdentifier()
else if process? else if process?

View File

@ -2,9 +2,9 @@
<html> <html>
<head> <head>
<title>Testing CFI functionality</title> <title>Testing CFI functionality</title>
<script type="text/javascript" src="cfi.js"></script> <script type="text/javascript" src="../cfi.coffee"></script>
<script type="text/javascript" src="jquery.js"></script> <script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="cfi-test.js"></script> <script type="text/javascript" src="cfi-test.coffee"></script>
</head> </head>
<body> <body>
<h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1> <h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1>

View File

@ -18,8 +18,8 @@ except ImportError:
def run_devel_server(): def run_devel_server():
os.chdir(os.path.dirname(__file__)) os.chdir(os.path.dirname(os.path.abspath(__file__)))
serve(['../cfi.coffee', 'cfi-test.coffee']) serve()
if __name__ == '__main__': if __name__ == '__main__':
run_devel_server() run_devel_server()

View File

@ -17,6 +17,7 @@ from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True) RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
XHTML_NS = 'http://www.w3.org/1999/xhtml' XHTML_NS = 'http://www.w3.org/1999/xhtml'
XMLNS_NS = 'http://www.w3.org/2000/xmlns/'
class NotHTML(Exception): class NotHTML(Exception):
@ -28,9 +29,7 @@ def barename(name):
return name.rpartition('}')[-1] return name.rpartition('}')[-1]
def namespace(name): def namespace(name):
if '}' in name: return name.rpartition('}')[0][1:]
return name.split('}', 1)[0][1:]
return ''
def XHTML(name): def XHTML(name):
return '{%s}%s' % (XHTML_NS, name) return '{%s}%s' % (XHTML_NS, name)
@ -60,26 +59,86 @@ def merge_multiple_html_heads_and_bodies(root, log=None):
log.warn('Merging multiple <head> and <body> sections') log.warn('Merging multiple <head> and <body> sections')
return root return root
def _html5_parse(data): def clone_element(elem, nsmap={}, in_context=True):
if in_context:
maker = elem.getroottree().getroot().makeelement
else:
maker = etree.Element
nelem = maker(elem.tag, attrib=elem.attrib,
nsmap=nsmap)
nelem.text, nelem.tail = elem.text, elem.tail
nelem.extend(elem)
return nelem
def html5_parse(data):
import html5lib import html5lib
data = html5lib.parse(data, treebuilder='lxml').getroot() data = html5lib.parse(data, treebuilder='lxml').getroot()
html_ns = [ns for ns, val in data.nsmap.iteritems() if (val == XHTML_NS and # Set lang correctly
ns is not None)] xl = data.attrib.pop('xmlU0003Alang', None)
if html_ns: if xl is not None and 'lang' not in data.attrib:
# html5lib causes the XHTML namespace to not data.attrib['lang'] = xl
# be set as the default namespace
nsmap = dict(data.nsmap) # html5lib has the most inelegant handling of namespaces I have ever seen
nsmap[None] = XHTML_NS # Try to reconstitute destroyed namespace info
for x in html_ns: xmlns_declaration = '{%s}'%XMLNS_NS
nsmap.pop(x) non_html5_namespaces = {}
nroot = etree.Element(data.tag, nsmap=nsmap, seen_namespaces = set()
attrib=dict(data.attrib)) for elem in tuple(data.iter()):
nroot.text = data.text elem.attrib.pop('xmlns', None)
nroot.tail = data.tail namespaces = {}
for child in data: for x in tuple(elem.attrib):
nroot.append(child) if x.startswith('xmlnsU') or x.startswith(xmlns_declaration):
data = nroot # A namespace declaration
return data val = elem.attrib.pop(x)
if x.startswith('xmlnsU0003A'):
prefix = x[11:]
namespaces[prefix] = val
if namespaces:
# Some destroyed namespace declarations were found
p = elem.getparent()
if p is None:
# We handle the root node later
non_html5_namespaces = namespaces
else:
idx = p.index(elem)
p.remove(elem)
elem = clone_element(elem, nsmap=namespaces)
p.insert(idx, elem)
b = barename(elem.tag)
idx = b.find('U0003A')
if idx > -1:
prefix, tag = b[:idx], b[idx+6:]
ns = elem.nsmap.get(prefix, None)
if ns is None:
ns = non_html5_namespaces.get(prefix, None)
if ns is not None:
elem.tag = '{%s}%s'%(ns, tag)
for b in tuple(elem.attrib):
idx = b.find('U0003A')
if idx > -1:
prefix, tag = b[:idx], b[idx+6:]
ns = elem.nsmap.get(prefix, None)
if ns is None:
ns = non_html5_namespaces.get(prefix, None)
if ns is not None:
elem.attrib['{%s}%s'%(ns, tag)] = elem.attrib.pop(b)
seen_namespaces |= set(elem.nsmap.itervalues())
nsmap = dict(html5lib.constants.namespaces)
nsmap[None] = nsmap.pop('html')
non_html5_namespaces.update(nsmap)
nsmap = non_html5_namespaces
data = clone_element(data, nsmap=nsmap, in_context=False)
# Remove unused namespace declarations
fnsmap = {k:v for k,v in nsmap.iteritems() if v in seen_namespaces and v !=
XMLNS_NS}
return clone_element(data, nsmap=fnsmap, in_context=False)
def _html4_parse(data, prefer_soup=False): def _html4_parse(data, prefer_soup=False):
if prefer_soup: if prefer_soup:
@ -177,7 +236,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
except etree.XMLSyntaxError: except etree.XMLSyntaxError:
log.debug('Parsing %s as HTML' % filename) log.debug('Parsing %s as HTML' % filename)
try: try:
data = _html5_parse(data) data = html5_parse(data)
except: except:
log.exception( log.exception(
'HTML 5 parsing failed, falling back to older parsers') 'HTML 5 parsing failed, falling back to older parsers')
@ -261,6 +320,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
nroot.append(elem) nroot.append(elem)
data = nroot data = nroot
data = merge_multiple_html_heads_and_bodies(data, log) data = merge_multiple_html_heads_and_bodies(data, log)
# Ensure has a <head/> # Ensure has a <head/>
head = xpath(data, '/h:html/h:head') head = xpath(data, '/h:html/h:head')

View File

@ -159,15 +159,18 @@ class FlatFilenames(object): # {{{
continue continue
data = item.data data = item.data
isp = item.spine_position
nhref = oeb.manifest.generate(href=nhref)[1] nhref = oeb.manifest.generate(href=nhref)[1]
if isp is not None:
oeb.spine.remove(item)
oeb.manifest.remove(item)
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data, nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
fallback=item.fallback) fallback=item.fallback)
self.rename_map[item.href] = nhref self.rename_map[item.href] = nhref
self.renamed_items_map[nhref] = item self.renamed_items_map[nhref] = item
if item.spine_position is not None: if isp is not None:
oeb.spine.insert(item.spine_position, nitem, item.linear) oeb.spine.insert(isp, nitem, item.linear)
oeb.spine.remove(item)
oeb.manifest.remove(item)
if self.rename_map: if self.rename_map:
self.log('Found non-flat filenames, renaming to support broken' self.log('Found non-flat filenames, renaming to support broken'

View File

@ -16,7 +16,7 @@ class UnsmartenPunctuation(object):
def unsmarten(self, root): def unsmarten(self, root):
for x in self.html_tags(root): for x in self.html_tags(root):
if not barename(x) == 'pre': if not barename(x.tag) == 'pre':
if getattr(x, 'text', None): if getattr(x, 'text', None):
x.text = unsmarten_text(x.text) x.text = unsmarten_text(x.text)
if getattr(x, 'tail', None) and x.tail: if getattr(x, 'tail', None) and x.tail:

View File

@ -273,11 +273,34 @@ def error_dialog(parent, title, msg, det_msg='', show=False,
return d return d
def question_dialog(parent, title, msg, det_msg='', show_copy_button=False, def question_dialog(parent, title, msg, det_msg='', show_copy_button=False,
default_yes=True): default_yes=True,
# Skippable dialogs
# Set skip_dialog_name to a unique name for this dialog
# Set skip_dialog_msg to a message displayed to the user
skip_dialog_name=None, skip_dialog_msg=_('Show this confirmation again'),
skip_dialog_skipped_value=True, skip_dialog_skip_precheck=True):
from calibre.gui2.dialogs.message_box import MessageBox from calibre.gui2.dialogs.message_box import MessageBox
auto_skip = set(gprefs.get('questions_to_auto_skip', []))
if (skip_dialog_name is not None and skip_dialog_name in auto_skip):
return bool(skip_dialog_skipped_value)
d = MessageBox(MessageBox.QUESTION, title, msg, det_msg, parent=parent, d = MessageBox(MessageBox.QUESTION, title, msg, det_msg, parent=parent,
show_copy_button=show_copy_button, default_yes=default_yes) show_copy_button=show_copy_button, default_yes=default_yes)
return d.exec_() == d.Accepted
if skip_dialog_name is not None and skip_dialog_msg:
tc = d.toggle_checkbox
tc.setVisible(True)
tc.setText(skip_dialog_msg)
tc.setChecked(bool(skip_dialog_skip_precheck))
ret = d.exec_() == d.Accepted
if skip_dialog_name is not None and not d.toggle_checkbox.isChecked():
auto_skip.add(skip_dialog_name)
gprefs.set('questions_to_auto_skip', list(auto_skip))
return ret
def info_dialog(parent, title, msg, det_msg='', show=False, def info_dialog(parent, title, msg, det_msg='', show=False,
show_copy_button=True): show_copy_button=True):

View File

@ -683,7 +683,7 @@ class DeviceMixin(object): # {{{
return self.ask_a_yes_no_question( return self.ask_a_yes_no_question(
_('No suitable formats'), msg, _('No suitable formats'), msg,
ans_when_user_unavailable=True, ans_when_user_unavailable=True,
det_msg=autos det_msg=autos, skip_dialog_name='auto_convert_before_send'
) )
def set_default_thumbnail(self, height): def set_default_thumbnail(self, height):

View File

@ -12,7 +12,7 @@ from PyQt4.Qt import QDialog, QApplication
from calibre.gui2.dialogs.add_from_isbn_ui import Ui_Dialog from calibre.gui2.dialogs.add_from_isbn_ui import Ui_Dialog
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre.gui2 import gprefs from calibre.gui2 import gprefs, question_dialog, error_dialog
class AddFromISBN(QDialog, Ui_Dialog): class AddFromISBN(QDialog, Ui_Dialog):
@ -44,6 +44,7 @@ class AddFromISBN(QDialog, Ui_Dialog):
tags = list(filter(None, [x.strip() for x in tags])) tags = list(filter(None, [x.strip() for x in tags]))
gprefs['add from ISBN tags'] = tags gprefs['add from ISBN tags'] = tags
self.set_tags = tags self.set_tags = tags
bad = set()
for line in unicode(self.isbn_box.toPlainText()).strip().splitlines(): for line in unicode(self.isbn_box.toPlainText()).strip().splitlines():
line = line.strip() line = line.strip()
if not line: if not line:
@ -64,5 +65,19 @@ class AddFromISBN(QDialog, Ui_Dialog):
os.access(parts[1], os.R_OK) and os.path.isfile(parts[1]): os.access(parts[1], os.R_OK) and os.path.isfile(parts[1]):
book['path'] = parts[1] book['path'] = parts[1]
self.books.append(book) self.books.append(book)
else:
bad.add(parts[0])
if bad:
if self.books:
if not question_dialog(self, _('Some invalid ISBNs'),
_('Some of the ISBNs you entered were invalid. They will'
' be ignored. Click Show Details to see which ones.'
' Do you want to proceed?'), det_msg='\n'.join(bad),
show_copy_button=True):
return
else:
return error_dialog(self, _('All invalid ISBNs'),
_('All the ISBNs you entered were invalid. No books'
' can be added.'), show=True)
QDialog.accept(self, *args) QDialog.accept(self, *args)

View File

@ -44,6 +44,7 @@ class MessageBox(QDialog, Ui_Dialog): # {{{
self.msg.setText(msg) self.msg.setText(msg)
self.det_msg.setPlainText(det_msg) self.det_msg.setPlainText(det_msg)
self.det_msg.setVisible(False) self.det_msg.setVisible(False)
self.toggle_checkbox.setVisible(False)
if show_copy_button: if show_copy_button:
self.ctc_button = self.bb.addButton(_('&Copy to clipboard'), self.ctc_button = self.bb.addButton(_('&Copy to clipboard'),

View File

@ -53,7 +53,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0" colspan="2"> <item row="3" column="0" colspan="2">
<widget class="QDialogButtonBox" name="bb"> <widget class="QDialogButtonBox" name="bb">
<property name="orientation"> <property name="orientation">
<enum>Qt::Horizontal</enum> <enum>Qt::Horizontal</enum>
@ -63,6 +63,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="toggle_checkbox">
<property name="text">
<string/>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources> <resources>

View File

@ -143,12 +143,12 @@ class GuiRunner(QObject):
add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False) add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False)
sys.excepthook = main.unhandled_exception sys.excepthook = main.unhandled_exception
if len(self.args) > 1: if len(self.args) > 1:
p = os.path.abspath(self.args[1]) files = [os.path.abspath(p) for p in self.args[1:] if not
if os.path.isdir(p): os.path.isdir(p)]
prints('Ignoring directory passed as command line argument:', if len(files) < len(sys.argv[1:]):
self.args[1]) prints('Ignoring directories passed as command line arguments')
else: if files:
add_filesystem_book(p) add_filesystem_book(files)
self.app.file_event_hook = add_filesystem_book self.app.file_event_hook = add_filesystem_book
self.main = main self.main = main

View File

@ -162,6 +162,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
for key in dynamic.keys(): for key in dynamic.keys():
if key.endswith('_again') and dynamic[key] is False: if key.endswith('_again') and dynamic[key] is False:
dynamic[key] = True dynamic[key] = True
gprefs['questions_to_auto_skip'] = []
info_dialog(self, _('Done'), info_dialog(self, _('Done'),
_('Confirmation dialogs have all been reset'), show=True) _('Confirmation dialogs have all been reset'), show=True)

View File

@ -260,11 +260,11 @@ class ShortcutConfig(QWidget):
self.view.setModel(model) self.view.setModel(model)
self.delegate = Delegate() self.delegate = Delegate()
self.view.setItemDelegate(self.delegate) self.view.setItemDelegate(self.delegate)
self.delegate.sizeHintChanged.connect(self.scrollTo) self.delegate.sizeHintChanged.connect(self.scrollTo,
type=Qt.QueuedConnection)
def scrollTo(self, index): def scrollTo(self, index):
self.view.scrollTo(index) self.view.scrollTo(index, self.view.EnsureVisible)
@property @property
def is_editing(self): def is_editing(self):

View File

@ -54,12 +54,12 @@ class GandalfStore(BasicStoreConfig, StorePlugin):
if not id: if not id:
continue continue
cover_url = ''.join(data.xpath('.//img/@src')) cover_url = ''.join(data.xpath('.//div[@class="info"]/h3/a/@id'))
title = ''.join(data.xpath('.//div[@class="info"]/h3/a/@title')) title = ''.join(data.xpath('.//div[@class="info"]/h3/a/@title'))
formats = title.split() formats = ''.join(data.xpath('.//div[@class="info"]/p[1]/text()'))
formats = formats[-1] formats = re.findall(r'\((.*?)\)',formats)[0]
author = ''.join(data.xpath('.//div[@class="info"]/h4/text() | .//div[@class="info"]/h4/span/text()')) author = ''.join(data.xpath('.//div[@class="info"]/h4/text() | .//div[@class="info"]/h4/span/text()'))
price = ''.join(data.xpath('.//h3[@class="promocja"]/text()')) price = ''.join(data.xpath('.//div[@class="options"]/h3/text()'))
price = re.sub('PLN', '', price) price = re.sub('PLN', '', price)
price = re.sub('\.', ',', price) price = re.sub('\.', ',', price)
drm = data.xpath('boolean(.//div[@class="info" and contains(., "Zabezpieczenie: DRM")])') drm = data.xpath('boolean(.//div[@class="info" and contains(., "Zabezpieczenie: DRM")])')
@ -67,7 +67,7 @@ class GandalfStore(BasicStoreConfig, StorePlugin):
counter -= 1 counter -= 1
s = SearchResult() s = SearchResult()
s.cover_url = cover_url s.cover_url = 'http://imguser.gandalf.com.pl/' + re.sub('p', 'p_', cover_url) + '.jpg'
s.title = title.strip() s.title = title.strip()
s.author = author.strip() s.author = author.strip()
s.price = price s.price = price

View File

@ -407,11 +407,14 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
return getattr(self, '__systray_minimized', False) return getattr(self, '__systray_minimized', False)
def ask_a_yes_no_question(self, title, msg, det_msg='', def ask_a_yes_no_question(self, title, msg, det_msg='',
show_copy_button=False, ans_when_user_unavailable=True): show_copy_button=False, ans_when_user_unavailable=True,
skip_dialog_name=None, skipped_value=True):
if self.is_minimized_to_tray: if self.is_minimized_to_tray:
return ans_when_user_unavailable return ans_when_user_unavailable
return question_dialog(self, title, msg, det_msg=det_msg, return question_dialog(self, title, msg, det_msg=det_msg,
show_copy_button=show_copy_button) show_copy_button=show_copy_button,
skip_dialog_name=skip_dialog_name,
skip_dialog_skipped_value=skipped_value)
def hide_windows(self): def hide_windows(self):
for window in QApplication.topLevelWidgets(): for window in QApplication.topLevelWidgets():

View File

@ -269,7 +269,7 @@ How do I use |app| with my Android phone/tablet?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are two ways that you can connect your Android device to calibre. Using a USB cable-- or wirelessly, over the air. There are two ways that you can connect your Android device to calibre. Using a USB cable-- or wirelessly, over the air.
**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**. **The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**. Some devices may have a setting to put them in "disk mode" or "USB Transfer mode" that is needed before they act as USB disks.
Using a USB cable Using a USB cable
^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -4,9 +4,9 @@
# #
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: calibre 0.8.31\n" "Project-Id-Version: calibre 0.8.32\n"
"POT-Creation-Date: 2011-12-16 09:38+IST\n" "POT-Creation-Date: 2011-12-23 08:40+IST\n"
"PO-Revision-Date: 2011-12-16 09:38+IST\n" "PO-Revision-Date: 2011-12-23 08:40+IST\n"
"Last-Translator: Automatically generated\n" "Last-Translator: Automatically generated\n"
"Language-Team: LANGUAGE\n" "Language-Team: LANGUAGE\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
@ -24,8 +24,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/db/cache.py:105 #: /home/kovid/work/calibre/src/calibre/db/cache.py:105
#: /home/kovid/work/calibre/src/calibre/db/cache.py:108 #: /home/kovid/work/calibre/src/calibre/db/cache.py:108
#: /home/kovid/work/calibre/src/calibre/db/cache.py:119 #: /home/kovid/work/calibre/src/calibre/db/cache.py:119
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:282 #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:285
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:283 #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:286
#: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:99 #: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:99
#: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:100 #: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:100
#: /home/kovid/work/calibre/src/calibre/devices/jetbook/driver.py:74 #: /home/kovid/work/calibre/src/calibre/devices/jetbook/driver.py:74
@ -36,8 +36,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:71 #: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:71
#: /home/kovid/work/calibre/src/calibre/devices/prs500/books.py:267 #: /home/kovid/work/calibre/src/calibre/devices/prs500/books.py:267
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:660 #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:660
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:328 #: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:330
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:329 #: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:331
#: /home/kovid/work/calibre/src/calibre/devices/usbms/driver.py:485 #: /home/kovid/work/calibre/src/calibre/devices/usbms/driver.py:485
#: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:106 #: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:106
#: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:109 #: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:109
@ -97,17 +97,17 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:85 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:85
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:128 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:128
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:169 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:169
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:748 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:749
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1005 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1006
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1007 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1008
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1009 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1010
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/utils.py:299 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/utils.py:299
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/writer2/indexer.py:497 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/writer2/indexer.py:497
#: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:145 #: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:145
#: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:147 #: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:147
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:818 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:818
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:273 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:333
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:277 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:337
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:142 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:142
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:149 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:149
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:66 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:66
@ -137,8 +137,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/pdf/writer.py:103 #: /home/kovid/work/calibre/src/calibre/ebooks/pdf/writer.py:103
#: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:320 #: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:320
#: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:322 #: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:322
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:387 #: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:410
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:395 #: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:418
#: /home/kovid/work/calibre/src/calibre/gui2/actions/add.py:157 #: /home/kovid/work/calibre/src/calibre/gui2/actions/add.py:157
#: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:378 #: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:378
#: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:381 #: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:381
@ -889,15 +889,15 @@ msgstr ""
msgid "Communicate with Android phones." msgid "Communicate with Android phones."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:152 #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:154
msgid "Comma separated list of directories to send e-books to on the device. The first one that exists will be used" msgid "Comma separated list of directories to send e-books to on the device. The first one that exists will be used"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:222 #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:225
msgid "Communicate with S60 phones." msgid "Communicate with S60 phones."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:241 #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:244
msgid "Communicate with WebOS tablets." msgid "Communicate with WebOS tablets."
msgstr "" msgstr ""
@ -993,8 +993,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:102 #: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:102
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:447 #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:447
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:470 #: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:470
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:547 #: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:549
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:566 #: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:568
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1052 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1052
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1058 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1058
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1093 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1093
@ -2036,7 +2036,7 @@ msgid "When creating a TOC from links in the input document, allow duplicate ent
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:281 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:281
msgid "An XPath expression to detect chapter titles. The default is to consider <h1> or <h2> tags that contain the words \"chapter\",\"book\",\"section\" or \"part\" as chapter titles as well as any tags that have class=\"chapter\". The expression used must evaluate to a list of elements. To disable chapter detection, use the expression \"/\". See the XPath Tutorial in the calibre User Manual for further help on using this feature." msgid "An XPath expression to detect chapter titles. The default is to consider <h1> or <h2> tags that contain the words \"chapter\",\"book\",\"section\", \"prologue\", \"epilogue\", or \"part\" as chapter titles as well as any tags that have class=\"chapter\". The expression used must evaluate to a list of elements. To disable chapter detection, use the expression \"/\". See the XPath Tutorial in the calibre User Manual for further help on using this feature."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:295 #: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:295
@ -2934,7 +2934,7 @@ msgid ""
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1417 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1417
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1238 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1244
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:943 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:943
#: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:41 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:41
msgid "Cover" msgid "Cover"
@ -3085,70 +3085,70 @@ msgstr ""
msgid "No details available" msgid "No details available"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1239 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1245
msgid "Title Page" msgid "Title Page"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1240 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1246
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/htmltoc.py:15 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/htmltoc.py:15
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main.py:56 #: /home/kovid/work/calibre/src/calibre/gui2/viewer/main.py:56
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:199 #: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:199
msgid "Table of Contents" msgid "Table of Contents"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1241 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1247
msgid "Index" msgid "Index"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1242 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1248
msgid "Glossary" msgid "Glossary"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1243 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1249
msgid "Acknowledgements" msgid "Acknowledgements"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1244 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1250
msgid "Bibliography" msgid "Bibliography"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1245 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1251
msgid "Colophon" msgid "Colophon"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1246 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1252
msgid "Copyright" msgid "Copyright"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1247 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1253
msgid "Dedication" msgid "Dedication"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1248 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1254
msgid "Epigraph" msgid "Epigraph"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1249 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1255
msgid "Foreword" msgid "Foreword"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1250 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1256
msgid "List of Illustrations" msgid "List of Illustrations"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1251 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1257
msgid "List of Tables" msgid "List of Tables"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1252 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1258
msgid "Notes" msgid "Notes"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1253 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1259
msgid "Preface" msgid "Preface"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1254 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1260
msgid "Main Text" msgid "Main Text"
msgstr "" msgstr ""
@ -3681,7 +3681,11 @@ msgstr ""
msgid "tag browser categories not to display" msgid "tag browser categories not to display"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:506 #: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:280
msgid "Show this confirmation again"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:529
msgid "Choose Files" msgid "Choose Files"
msgstr "" msgstr ""
@ -4186,7 +4190,7 @@ msgid "Create a catalog of the books in your calibre library"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/actions/convert.py:88 #: /home/kovid/work/calibre/src/calibre/gui2/actions/convert.py:88
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:591 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:594
msgid "Cannot convert" msgid "Cannot convert"
msgstr "" msgstr ""
@ -6643,7 +6647,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/single_ui.py:117 #: /home/kovid/work/calibre/src/calibre/gui2/convert/single_ui.py:117
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info_ui.py:69 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info_ui.py:69
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/comicconf_ui.py:96 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/comicconf_ui.py:96
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box_ui.py:52 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box_ui.py:21
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/progress_ui.py:53 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/progress_ui.py:53
#: /home/kovid/work/calibre/src/calibre/gui2/store/mobileread_store_dialog_ui.py:61 #: /home/kovid/work/calibre/src/calibre/gui2/store/mobileread_store_dialog_ui.py:61
#: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog_ui.py:50 #: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog_ui.py:50
@ -7279,6 +7283,22 @@ msgstr ""
msgid "Reset author to Unknown" msgid "Reset author to Unknown"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:72
msgid "Some invalid ISBNs"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:73
msgid "Some of the ISBNs you entered were invalid. They will be ignored. Click Show Details to see which ones. Do you want to proceed?"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:79
msgid "All invalid ISBNs"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:80
msgid "All the ISBNs you entered were invalid. No books can be added."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn_ui.py:63 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn_ui.py:63
msgid "Add books by ISBN" msgid "Add books by ISBN"
msgstr "" msgstr ""
@ -7920,35 +7940,35 @@ msgstr ""
msgid "&Hide all jobs" msgid "&Hide all jobs"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:49 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:50
msgid "&Copy to clipboard" msgid "&Copy to clipboard"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:53 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:54
msgid "Show &details" msgid "Show &details"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:54 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:55
msgid "Hide &details" msgid "Hide &details"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:58 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:59
msgid "Show detailed information about this error" msgid "Show detailed information about this error"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:100 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:101
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/__init__.py:552 #: /home/kovid/work/calibre/src/calibre/gui2/wizard/__init__.py:552
msgid "Copied" msgid "Copied"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:138 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:139
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:796 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:796
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:205 #: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:205
msgid "Copy to clipboard" msgid "Copy to clipboard"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:184 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:185
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:232 #: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:233
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:860 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:860
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:961 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:961
msgid "View log" msgid "View log"
@ -10063,7 +10083,7 @@ msgid "None"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/keyboard.py:389 #: /home/kovid/work/calibre/src/calibre/gui2/keyboard.py:389
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:165 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:166
msgid "Done" msgid "Done"
msgstr "" msgstr ""
@ -10404,7 +10424,7 @@ msgid "Failed to create calibre library at: %r."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:108 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:108
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:170 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:169
msgid "Choose a location for your new calibre e-book library" msgid "Choose a location for your new calibre e-book library"
msgstr "" msgstr ""
@ -10412,74 +10432,74 @@ msgstr ""
msgid "Initializing user interface..." msgid "Initializing user interface..."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:164 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:163
msgid "Repairing failed" msgid "Repairing failed"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:165 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:164
msgid "The database repair failed. Starting with a new empty library." msgid "The database repair failed. Starting with a new empty library."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:179 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:178
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:204 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:203
msgid "Bad database location" msgid "Bad database location"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:180 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:179
#, python-format #, python-format
msgid "Bad database location %r. calibre will now quit." msgid "Bad database location %r. calibre will now quit."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:192 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:191
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:494 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:497
msgid "Corrupted database" msgid "Corrupted database"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:193 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:192
#, python-format #, python-format
msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful. If you say No, a new empty calibre library will be created." msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful. If you say No, a new empty calibre library will be created."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:205 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:204
#, python-format #, python-format
msgid "Bad database location %r. Will start with a new, empty calibre library" msgid "Bad database location %r. Will start with a new, empty calibre library"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:215 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:214
#, python-format #, python-format
msgid "Starting %s: Loading books..." msgid "Starting %s: Loading books..."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:295 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:294
msgid "If you are sure it is not running" msgid "If you are sure it is not running"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:298 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:297
msgid "may be running in the system tray, in the" msgid "may be running in the system tray, in the"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:300 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:299
msgid "upper right region of the screen." msgid "upper right region of the screen."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:302 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:301
msgid "lower right region of the screen." msgid "lower right region of the screen."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:305 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:304
msgid "try rebooting your computer." msgid "try rebooting your computer."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:307 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:306
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:321 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:320
msgid "try deleting the file" msgid "try deleting the file"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:310 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:309
msgid "Cannot Start " msgid "Cannot Start "
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:311 #: /home/kovid/work/calibre/src/calibre/gui2/main.py:310
#, python-format #, python-format
msgid "%s is already running." msgid "%s is already running."
msgstr "" msgstr ""
@ -11097,7 +11117,7 @@ msgstr ""
msgid "All on 1 tab" msgid "All on 1 tab"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:166 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:167
msgid "Confirmation dialogs have all been reset" msgid "Confirmation dialogs have all been reset"
msgstr "" msgstr ""
@ -12623,7 +12643,7 @@ msgid "Here you can control how calibre will save your books when you click the
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:70 #: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:70
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:432 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:435
msgid "Failed to start content server" msgid "Failed to start content server"
msgstr "" msgstr ""
@ -13791,39 +13811,39 @@ msgstr ""
msgid "You have started calibre in debug mode. After you quit calibre, the debug log will be available in the file: %s<p>The log will be displayed automatically." msgid "You have started calibre in debug mode. After you quit calibre, the debug log will be available in the file: %s<p>The log will be displayed automatically."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:495 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:498
#, python-format #, python-format
msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful." msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful."
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:579 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:582
msgid "Conversion Error" msgid "Conversion Error"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:602 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:605
msgid "Recipe Disabled" msgid "Recipe Disabled"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:618 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:621
msgid "<b>Failed</b>" msgid "<b>Failed</b>"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:652 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:655
msgid "There are active jobs. Are you sure you want to quit?" msgid "There are active jobs. Are you sure you want to quit?"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:655 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:658
msgid "" msgid ""
" is communicating with the device!<br>\n" " is communicating with the device!<br>\n"
" Quitting may cause corruption on the device.<br>\n" " Quitting may cause corruption on the device.<br>\n"
" Are you sure you want to quit?" " Are you sure you want to quit?"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:659 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:662
msgid "Active jobs" msgid "Active jobs"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:727 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:730
msgid "will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray." msgid "will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray."
msgstr "" msgstr ""

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More