Merge from trunk

This commit is contained in:
Charles Haley 2011-12-23 09:13:13 +01:00
commit b7f2bb23df
147 changed files with 25886 additions and 21914 deletions

View File

@ -19,6 +19,81 @@
# new recipes:
# - title:
- version: 0.8.32
date: 2011-12-23
new features:
- title: "Linux: When deleting books, send them to the recycle bin, instead of permanently deleting. This is the same behavior as on Windows and OS X."
- title: "Add a checkbox to allow users to disable the popup that asks if books should be auto-converted before sending to device"
- title: "Drivers for Droid Razr, Samsung GT-I9003 and Bookeen Odyssey"
tickets: [906356, 906056, 905862]
- title: "Allow passing multiple filenames as command line arguments to calibre, to add multiple books."
tickets: [907968]
bug fixes:
- title: "MOBI Output: Fix regression in 0.8.30 that caused the use of hidden heading elements for the TOC to generate links in the wrong place."
tickets: [907156]
- title: "EPUB Output: Ensure directories have the correct permissions bits set when unzipping an epub with unzip on Unix"
- title: "Fix bottom most shortcuts in keyboard shortcuts for viewer not editable"
- title: "EPUB Output: Fix handling of self closing <audio> tags."
tickets: [906521]
- title: "MOBI Input: Map invalid <o:p> tags to <p> tags before parsing, to handle broken nesting."
tickets: [905715]
- title: "Conversion pipeline: HTML5 parsing: Fix handling of XML namespaces. Fixes regression in 0.8.30 that caused some articles in some news downloads to appear blank when viewed in Adobe Digital Editions based readers"
- title: "Get Books: Gandalf store, fix price and cover detection"
- title: "EPUB Output: Fix the Flatten filenames option in EPUB Output causing duplicated manifest ids in rare cases."
tickets: [905692]
- title: "When adding books via ISBN, show the user the list of invalid ISBNs that will be ignored, if any, before starting the add operation."
tickets: [905690]
- title: "Fix unsmarten punctuation conversion option broken in 0.8.31."
tickets: [905596]
- title: "Fix broken evaluation of composite columns in save-to-disk"
improved recipes:
- Cosmopolitan UK
- Hindustan Times
- HVG
- moneynews.com
- Ming Pao
- Glasgow Herald
- Times of India
- Focus Magazine
- Hacker News
- Independent
- Sueddeutsche
new recipes:
- title: Prospect Magazine UK
author: Barty and duoloz
- title: Elet es Irodalom and NOL
author: Bigpapa
- title: Salonica Press News
author: SteliosGero
- title: Echo Online
author: Armin Geller
- title: Various Polish news sources
author: fenuks
- title: Various Italian news sources
author: faber1971
- version: 0.8.31
date: 2011-12-16

View File

@ -1,19 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone'
__author__ = 'fenuks'
description = 'Adventure zone - adventure games from A to Z'
category = 'games'
language = 'pl'
oldest_article = 15
max_articles_per_feed = 100
no_stylesheets = True
oldest_article = 20
max_articles_per_feed = 100
use_embedded_content=False
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
remove_tags_after= dict(id='comments')
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php')
tag=soup.find(name='channel')
titles=[]
for r in tag.findAll(name='image'):
r.extract()
art=tag.findAll(name='item')
for i in art:
titles.append(i.title.string)
for feed in feeds:
for article in feed.articles[:]:
article.title=titles[feed.articles.index(article)]
return feeds
def get_cover_url(self):
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
cover=soup.find(id='box_OstatninumerAZ')
@ -22,17 +41,10 @@ class Adventure_zone(BasicNewsRecipe):
def skip_ad_pages(self, soup):
skip_tag = soup.body.findAll(name='a')
if skip_tag is not None:
for r in skip_tag:
if 'articles.php?' in r['href']:
if r.strong is not None:
word=r.strong.string
if ('zapowied' or 'recenzj') in word:
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
else:
None
def print_version(self, url):
return url.replace('news.php?readmore', 'print.php?type=N&item_id')
skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
skip_tag = skip_tag.findAll(name='a')
for r in skip_tag:
if r.strong:
word=r.strong.string
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AstroNEWS(BasicNewsRecipe):
title = u'AstroNEWS'
__author__ = 'fenuks'
@ -8,11 +7,16 @@ class AstroNEWS(BasicNewsRecipe):
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
auto_cleanup = True
#extra_css= 'table {text-align: left;}'
no_stylesheets=True
cover_url='http://news.astronet.pl/img/logo_news.jpg'
# no_stylesheets= True
remove_tags=[dict(name='hr')]
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
def print_version(self, url):
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
def preprocess_html(self, soup):
for item in soup.findAll(align=True):
del item['align']
return soup

19
recipes/biolog_pl.recipe Normal file
View File

@ -0,0 +1,19 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class Biolog_pl(BasicNewsRecipe):
title = u'Biolog.pl'
oldest_article = 7
max_articles_per_feed = 100
remove_empty_feeds=True
__author__ = 'fenuks'
description = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
category = 'biology'
language = 'pl'
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
no_stylesheets = True
#keeps_only_tags=[dict(id='main')]
remove_tags_before=dict(id='main')
remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
feeds = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]

View File

@ -0,0 +1,44 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Birmingham post'
description = 'News for Birmingham UK'
timefmt = ''
__author__ = 'Dave Asbury'
cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
oldest_article = 1
max_articles_per_feed = 20
remove_empty_feeds = True
remove_javascript = True
auto_cleanup = True
language = 'en_GB'
masthead_url = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
keep_only_tags = [
#dict(name='h1',attrs={'id' : 'article-headline'}),
#dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
#dict(name='p')
#dict(attrs={'id' : 'three-col'})
]
remove_tags = [
# dict(name='div',attrs={'class' : 'span-33 last header-links'})
]
feeds = [
#(u'News',u'http://www.birminghampost.net/news/rss.xml'),
(u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
(u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
]
extra_css = '''
body {font: sans-serif medium;}'
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
span{ font-size:9.5px; font-weight:bold;font-style:italic}
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''

View File

@ -0,0 +1,22 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class Computerworld_pl(BasicNewsRecipe):
title = u'Computerworld.pl'
__author__ = 'fenuks'
description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
category = 'IT'
language = 'pl'
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100
keep_only_tags=[dict(name='div', attrs={'id':'s'})]
remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
def get_cover_url(self):
soup = self.index_to_soup('http://www.computerworld.pl/')
cover=soup.find(name='img', attrs={'class':'prawo'})
self.cover_url=cover['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -7,6 +7,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
__author__ = 'Dave Asbury'
#last update 21/12/11
# greyscale code by Starson
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
no_stylesheets = True
@ -31,8 +32,9 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']})
]
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
dict(name='li',attrs={'class' : 'thumb'})
]
feeds = [
(u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
@ -48,4 +50,3 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
img.type = "GrayscaleType"
img.save(iurl)
return soup

15
recipes/datasport.recipe Normal file
View File

@ -0,0 +1,15 @@
__license__ = 'GPL v3'
__author__ = 'faber1971'
description = 'Italian soccer news website - v1.00 (17, December 2011)'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324114272(BasicNewsRecipe):
title = u'Datasport'
language = 'it'
__author__ = 'faber1971'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Datasport', u'http://www.datasport.it/calcio/rss.xml')]

View File

@ -0,0 +1,58 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Dziennik_pl(BasicNewsRecipe):
title = u'Dziennik.pl'
__author__ = 'fenuks'
description = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
category = 'newspaper'
language = 'pl'
cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
remove_empty_feeds=True
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: '')]
keep_only_tags=[dict(id='article')]
remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
(u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
(u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
(u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
(u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
(u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
(u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
(u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
def append_page(self, soup, appendtag):
tag=soup.find('a', attrs={'class':'page_next'})
if tag:
appendtag.find('div', attrs={'class':'article_paginator'}).extract()
while tag:
soup2= self.index_to_soup(tag['href'])
tag=soup2.find('a', attrs={'class':'page_next'})
if not tag:
for r in appendtag.findAll('div', attrs={'class':'art_src'}):
r.extract()
pagetext = soup2.find(name='div', attrs={'class':'article_body'})
for dictionary in self.remove_tags:
v=pagetext.findAll(name=dictionary['name'], attrs=dictionary['attrs'])
for delete in v:
delete.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
if appendtag.find('div', attrs={'class':'article_paginator'}):
appendtag.find('div', attrs={'class':'article_paginator'}).extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
return soup

View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
'''
Fetch echo-online.de
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Echo_Online(BasicNewsRecipe):
title = u' Echo Online'
description = '-Echo Online-'
publisher = 'Echo Online GmbH'
category = 'News, Germany'
__author__ = 'Armin Geller' # 2011-12-17
language = 'de'
lang = 'de-DE'
encoding = 'iso-8859-1'
timefmt = ' [%a, %d %b %Y]'
oldest_article = 7
max_articles_per_feed = 2
no_stylesheets = True
auto_cleanup = True
remove_javascript = True
feeds = [
(u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
(u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
(u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
(u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
(u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
(u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
(u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
(u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
(u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
(u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
(u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
]
def print_version(self, url):
return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
auto_cleanup_keep = '//div[@class="bild_gross w270"]'
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-ash2/41801_145340745513489_893927_n.jpg' # 2011-12-16 AGe
cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif' # 2011-12-16 AGe

View File

@ -0,0 +1,48 @@
################################################################################
#Description: http://es.hu/ RSS channel
#Author: Bigpapa (bigpapabig@hotmail.com)
#Date: 2010.12.01. - V1.0
################################################################################
from calibre.web.feeds.recipes import BasicNewsRecipe
class elet_es_irodalom(BasicNewsRecipe):
title = u'Elet es Irodalom'
__author__ = 'Bigpapa'
oldest_article = 7
max_articles_per_feed = 20 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'iso-8859-2'
category = 'Cikkek'
language = 'hu'
publication_type = 'newsportal'
extra_css = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
keep_only_tags = [
dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
]
remove_tags = [
dict(name='a', attrs={'target':['_TOP']}),
dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
]
feeds = [
(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
(u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
]

16
recipes/emuzica_pl.recipe Normal file
View File

@ -0,0 +1,16 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class eMuzyka(BasicNewsRecipe):
title = u'eMuzyka'
__author__ = 'fenuks'
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
category = 'music'
language = 'pl'
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
remove_tags=[dict(name='span', attrs={'id':'date'})]
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]

18
recipes/fisco_oggi.recipe Normal file
View File

@ -0,0 +1,18 @@
__license__ = 'GPL v3'
__author__ = 'faber1971'
description = 'Website of Italian Governament Income Agency (about revenue, taxation, taxes)- v1.00 (17, December 2011)'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324112023(BasicNewsRecipe):
title = u'Fisco Oggi'
language = 'it'
__author__ = 'faber1971'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
remove_javascript = True
no_stylesheets = True
feeds = [(u'Attualit\xe0', u'http://www.fiscooggi.it/taxonomy/term/1/feed'), (u'Normativa', u'http://www.fiscooggi.it/taxonomy/term/5/feed'), (u'Giurisprudenza', u'http://www.fiscooggi.it/taxonomy/term/8/feed'), (u'Dati e statistiche', u'http://www.fiscooggi.it/taxonomy/term/12/feed'), (u'Analisi e commenti', u'http://www.fiscooggi.it/taxonomy/term/13/feed'), (u'Bilancio e contabilit\xe0', u'http://www.fiscooggi.it/taxonomy/term/576/feed'), (u'Dalle regioni', u'http://www.fiscooggi.it/taxonomy/term/16/feed'), (u'Dal mondo', u'http://www.fiscooggi.it/taxonomy/term/17/feed')]

View File

@ -1,57 +1,68 @@
# -*- coding: utf-8 -*-
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Focus_pl(BasicNewsRecipe):
title = u'Focus.pl'
oldest_article = 15
max_articles_per_feed = 100
__author__ = 'fenuks'
language = 'pl'
description ='polish scientific monthly magazine'
class FocusRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'intromatyk <intromatyk@gmail.com>'
language = 'pl'
version = 1
title = u'Focus'
publisher = u'Gruner + Jahr Polska'
category = u'News'
description = u'Newspaper'
category='magazine'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
remove_tags_after=dict(name='div', attrs={'class':'clear'})
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
# Seems to work best, but YMMV
simultaneous_downloads = 5
r = re.compile('.*(?P<url>http:\/\/(www.focus.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'cll'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulm noprint'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'txb'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'h2'}))
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'txu'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'ulc'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
h1{text-align: left;}
h2{font-size: medium; font-weight: bold;}
p.lead {font-weight: bold; text-align: left;}
.authordate {font-size: small; color: #696969;}
.fot{font-size: x-small; color: #666666;}
'''
]
feeds = [
('Nauka', 'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
('Historia', 'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
('Cywilizacja', 'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
('Sport', 'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
('Technika', 'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
('Przyroda', 'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
('Technologie', 'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
]
def skip_ad_pages(self, soup):
tag=soup.find(name='a')
if tag:
new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
return new_soup
def append_page(self, appendtag):
tag=appendtag.find(name='div', attrs={'class':'arrows'})
if tag:
nexturl='http://www.focus.pl/'+tag.a['href']
for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
rem.extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
nexturl=None
pagetext=soup2.find(name='div', attrs={'class':'txt'})
tag=pagetext.find(name='div', attrs={'class':'arrows'})
for r in tag.findAll(name='a'):
if u'Następne' in r.string:
nexturl='http://www.focus.pl/'+r['href']
for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
rem.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
if ('advertisement' in soup.find('title').string.lower()):
href = soup.find('a').get('href')
return self.index_to_soup(href, raw=True)
else:
return None
def get_cover_url(self):
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
@ -60,7 +71,14 @@ class Focus_pl(BasicNewsRecipe):
self.cover_url='http://www.focus.pl/' + tag.a['href']
return getattr(self, 'cover_url', self.cover_url)
def preprocess_html(self, soup):
self.append_page(soup.body)
return soup
def print_version(self, url):
if url.count ('focus.pl.feedsportal.com'):
u = url.find('focus0Bpl')
u = 'http://www.focus.pl/' + url[u + 11:]
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace ('0E','-')
u = u.replace('/nc/1//story01.htm', '/do-druku/1')
else:
u = url.replace('/nc/1','/do-druku/1')
return u

View File

@ -1,4 +1,3 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GlasgowHerald(BasicNewsRecipe):
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
language = 'en_GB'
__author__ = 'Kovid Goyal'
use_embedded_content = False
keep_only_tags = [dict(attrs={'class':'article'})]
remove_tags = [
dict(id=['pic-nav']),
dict(attrs={'class':['comments-top']})
]
no_stylesheets = True
auto_cleanup = True
#keep_only_tags = [dict(attrs={'class':'article'})]
#remove_tags = [
#dict(id=['pic-nav']),
#dict(attrs={'class':['comments-top']})
#]
feeds = [
@ -25,5 +28,4 @@ class GlasgowHerald(BasicNewsRecipe):
(u'Arts & Entertainment',
u'http://www.heraldscotland.com/cmlink/1.768',),
(u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]

View File

@ -9,9 +9,9 @@ from calibre.ptempfile import PersistentTemporaryFile
from urlparse import urlparse
import re
class HackerNews(BasicNewsRecipe):
title = 'Hacker News'
__author__ = 'Tom Scholl'
class HNWithCommentsLink(BasicNewsRecipe):
title = 'HN With Comments Link'
__author__ = 'Tom Scholl & David Kerschner'
description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
publisher = 'Y Combinator'
category = 'news, programming, it, technology'
@ -80,6 +80,11 @@ class HackerNews(BasicNewsRecipe):
body = body + comments
return u'<html><title>' + title + u'</title><body>' + body + '</body></html>'
def parse_feeds(self):
a = super(HNWithCommentsLink, self).parse_feeds()
self.hn_articles = a[0].articles
return a
def get_obfuscated_article(self, url):
if url.startswith('http://news.ycombinator.com'):
content = self.get_hn_content(url)
@ -97,6 +102,13 @@ class HackerNews(BasicNewsRecipe):
else:
content = self.get_readable_content(url)
article = 0
for a in self.hn_articles:
if a.url == url:
article = a
content = re.sub(r'</body>\s*</html>\s*$', '', content) + article.summary + '</body></html>'
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(content)
self.temp_files[-1].close()

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
import urllib, re
class HindustanTimes(BasicNewsRecipe):
title = u'Hindustan Times'
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
]
def get_article_url(self, article):
'''
HT uses a variant of the feedportal RSS ad display mechanism
'''
try:
s = article.summary
return urllib.unquote(
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
except:
pass
url = BasicNewsRecipe.get_article_url(self, article)
res = self.browser.open_novisit(url)
url = res.geturl().split('/')[-2]
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
'www.'}
for k, v in encoding.iteritems():
url = url.replace(k, v)
return url

View File

@ -1,44 +1,58 @@
# -*- coding: utf-8 -*-
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
################################################################################
#Description: http://hvg.hu/ RSS channel
#Author: Bigpapa (bigpapabig@hotmail.com)
#Date: 2011.12.20. - V1.1
################################################################################
class HVG(BasicNewsRecipe):
title = 'HVG.HU'
__author__ = u'István Papp'
description = u'Friss hírek a HVG-től'
timefmt = ' [%Y. %b. %d., %a.]'
oldest_article = 4
language = 'hu'
from calibre.web.feeds.news import BasicNewsRecipe
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
publisher = 'HVG Online'
category = u'news, hírek, hvg'
extra_css = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
remove_tags_before = dict(id='pg-content')
remove_javascript = True
remove_empty_feeds = True
class hvg(BasicNewsRecipe):
title = u'HVG'
__author__ = 'Bigpapa'
language = 'hu'
oldest_article = 5 # Hany napos legyen a legregebbi cikk amit leszedjen.
max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
no_stylesheets = True
encoding = 'utf8'
extra_css = ' h2 { font:bold 28px} '
feeds = [
(u'Itthon', u'http://hvg.hu/rss/itthon')
,(u'Világ', u'http://hvg.hu/rss/vilag')
,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
,(u'Karrier', u'http://hvg.hu/rss/karrier')
,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
,(u'Kultúra', u'http://hvg.hu/rss/kultura')
,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
,(u'Sport', u'http://hvg.hu/rss/sport')
]
remove_attributes = ['style','font', 'href']
def print_version(self, url):
return url.replace ('#rss', '/print')
keep_only_tags = [
dict(name='div', attrs={'id':['pg-content']})
]
remove_tags = [
dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
dict(name='table', attrs={'class':['banner2', 'monocle']}),
dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
dict(name='h3', attrs={'class':['hthree']}),
dict(name='ul', attrs={'class':['defaultul']}),
dict(name='form', attrs={'id':['commentForm']}),
dict(name='h6', attrs={'class':['hthree']}),
dict(name='h6', attrs={'class':['more2']}),
dict(name='img', attrs={'class':['framed']}),
dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
]
feeds = [
# (u'\xd6sszes', 'http://hvg.hu/rss'),
(u'Itthon', 'http://hvg.hu/rss/itthon'),
(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
(u'Karrier', 'http://hvg.hu/rss/karrier'),
(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
(u'Sport', 'http://hvg.hu/rss/sport')
]

BIN
recipes/icons/biolog_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 373 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 481 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

BIN
recipes/icons/moneynews.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 914 B

View File

@ -109,7 +109,6 @@ class TheIndependentNew(BasicNewsRecipe):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
def preprocess_html(self, soup):
@ -273,12 +272,15 @@ class TheIndependentNew(BasicNewsRecipe):
def _insertRatingStars(self,soup,item):
if item.contents is None:
if item.contents is None or len(item.contents) < 1:
return
rating = item.contents[0]
if not rating.isdigit():
return None
rating = int(item.contents[0])
try:
rating = float(item.contents[0])
except:
print 'Could not convert decimal rating to star: malformatted float.'
return
for i in range(1,6):
star = Tag(soup,'img')
if i <= rating:

View File

@ -0,0 +1,14 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class Kosmonauta(BasicNewsRecipe):
title = u'Kosmonauta.net'
__author__ = 'fenuks'
description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
category = 'astronomy'
language = 'pl'
cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/index.php/feed/rss.html')]

View File

@ -10,6 +10,10 @@ __MakePeriodical__ = True
__UseChineseTitle__ = False
# Set it to False if you want to skip images (Default: True)
__KeepImages__ = True
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
__IncludeSummary__ = False
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
__IncludeThumbnails__ = True
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
__UseLife__ = True
# (HK only) It is to disable premium content (Default: False)
@ -24,12 +28,15 @@ __Date__ = ''
'''
Change Log:
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional.
2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -53,6 +60,7 @@ Change Log:
2010/10/31: skip repeated articles in section pages
'''
from calibre.utils.date import now as nowf
import os, datetime, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
@ -60,11 +68,15 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
@ -109,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
else:
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -127,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -161,9 +179,9 @@ class MPRecipe(BasicNewsRecipe):
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
# convert UTC to local hk time - at HKT 4.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -186,6 +204,18 @@ class MPRecipe(BasicNewsRecipe):
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self):
if __Date__ <> '':
return __Date__[6:8]
@ -237,7 +267,7 @@ class MPRecipe(BasicNewsRecipe):
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@ -274,7 +304,7 @@ class MPRecipe(BasicNewsRecipe):
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url)
@ -291,7 +321,7 @@ class MPRecipe(BasicNewsRecipe):
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
@ -299,7 +329,7 @@ class MPRecipe(BasicNewsRecipe):
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@ -379,7 +409,7 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
try:
try:
br.open_novisit(url)
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
@ -406,7 +436,7 @@ class MPRecipe(BasicNewsRecipe):
included_urls.append(url)
current_articles.reverse()
return current_articles
# parse from www.mingpaovan.com
def parse_section3(self, url, baseUrl):
self.get_fetchdate()
@ -528,7 +558,7 @@ class MPRecipe(BasicNewsRecipe):
photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
new_html = new_raw_html + '</body></html>'
else:
else:
# .txt based file
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
@ -591,23 +621,23 @@ class MPRecipe(BasicNewsRecipe):
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg)
else:
else:
# if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
@ -644,7 +674,7 @@ class MPRecipe(BasicNewsRecipe):
#print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg)
return new_html
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
@ -653,78 +683,154 @@ class MPRecipe(BasicNewsRecipe):
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def populate_article_metadata(self, article, soup, first):
# thumbnails shouldn't be available if using hi-res images
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
img = soup.find('img')
if img is not None:
self.add_toc_thumbnail(article, img['src'])
try:
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
# look for content
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
if articlebody:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
textFound = False
for p in paras:
if not textFound:
summary_candidate = self.tag_to_string(p).strip()
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
if len(summary_candidate) > 0:
article.summary = article.text_summary = summary_candidate
textFound = True
else:
# display a simple text
#article.summary = article.text_summary = u'\u66f4\u591a......'
# display word counts
counts = 0
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
for p in paras:
summary_candidate = self.tag_to_string(p).strip()
counts += len(summary_candidate)
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
except:
self.log("Error creating article descriptions")
return
# override from the one in version 0.8.31
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = self.short_title()
# change 1: allow our own flag to tell if a periodical is to be generated
# also use customed date instead of current time
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
# end of change 1
# change 2: __appname__ replaced by newspaper publisher
__appname__ = self.publisher
mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
# change 4: in the following, all the nowf() are changed to adjusted time
# This one doesn't matter
mi.timestamp = nowf()
# change 5: skip listing the articles
#article_titles, aseen = [], set()
#for f in feeds:
# for a in f:
# if a.title and a.title not in aseen:
# aseen.add(a.title)
# article_titles.append(force_unicode(a.title, 'utf-8'))
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
#mi.comments = self.description
#if not isinstance(mi.comments, unicode):
# mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
# '\n\n'.join(article_titles))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
language = canonicalize_lang(self.language)
if language is not None:
mi.language = language
# This one affects the pub date shown in kindle title
#mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
@ -739,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
desc = None
else:
desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
parent.add_item('%sindex.html'%adir, None,
a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
@ -762,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
@ -785,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
@ -799,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Toronto'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
# please replace the following "True" with "False". (Default: True)
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
# Turn below to True if your device supports display of CJK titles (Default: False)
__UseChineseTitle__ = False
# Set it to False if you want to skip images
# Set it to False if you want to skip images (Default: True)
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
__IncludeSummary__ = False
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
__IncludeThumbnails__ = True
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
__UseLife__ = True
# (HK only) It is to disable premium content (Default: False)
__InclPremium__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
__ParsePFF__ = True
# (HK only) Turn below to True if you wish hi-res images (Default: False)
__HiResImg__ = False
# Override the date returned by the program if specifying a YYYYMMDD below
__Date__ = ''
'''
Change Log:
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
@ -37,30 +60,38 @@ Change Log:
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
from calibre.utils.date import now as nowf
import os, datetime, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'class':['heading']}), # for heading from txt
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
dict(attrs={'class':['images']}) # for images from txt
]
if __KeepImages__:
remove_tags = [dict(name='style'),
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
else:
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
# convert UTC to local hk time - at HKT 4.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
if __InclPremium__ == True:
articles = self.parse_section2_txt(url, keystr)
else:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
#ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
#if ed_articles:
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
#fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
#if fin_articles:
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url)
# if articles:
# feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
#if ent_articles:
# feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
# replace the url to the print-friendly version
if __ParsePFF__ == True:
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
url = re.sub('%2F.*%2F', '/', url)
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
url = url.replace('%2Etxt', '_print.htm')
url = url.replace('%5F', '_')
else:
url = url.replace('.htm', '_print.htm')
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
br = mechanize.Browser()
br.set_handle_redirect(False)
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
try:
br.open_novisit(url)
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
except:
print 'skipping a premium article'
current_articles.reverse()
return current_articles
# parse from text file of life.mingpao.com
def parse_section2_txt(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
# preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url):
new_html = raw_html
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
if url.rfind('_print.htm') <> -1:
# javascript based file
splitter = re.compile(r'\n')
new_raw_html = '<html><head><title>Untitled</title></head>'
new_raw_html = new_raw_html + '<body>'
for item in splitter.split(raw_html):
if item.startswith('var heading1 ='):
heading = item.replace('var heading1 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
new_raw_html = new_raw_html + '<div class="heading">' + heading
if item.startswith('var heading2 ='):
heading = item.replace('var heading2 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
if heading <> '':
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
else:
new_raw_html = new_raw_html + '</div>'
if item.startswith('var content ='):
content = item.replace("var content = ", '')
content = content.replace('\'', '')
content = content.replace(';', '')
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
if item.startswith('var photocontent ='):
photo = item.replace('var photocontent = \'', '')
photo = photo.replace('\'', '')
photo = photo.replace(';', '')
photo = photo.replace('<tr>', '')
photo = photo.replace('<td>', '')
photo = photo.replace('</tr>', '')
photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
new_html = new_raw_html + '</body></html>'
else:
# .txt based file
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_img_txt = False
title_started = False
title_break_reached = False
met_article_start_char = False
for item in splitter.split(raw_html):
item = item.strip()
# if title already reached but break between title and content not yet found, record title_break_reached
if title_started == True and title_break_reached == False and item == '':
title_break_reached = True
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
# start content
elif title_started == True and title_break_reached == True and met_article_start_char == False:
if item <> '':
met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
#if item.startswith(u'\u3010'):
# met_article_start_char = True
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else:
if next_is_img_txt == False:
if item.startswith("=@"):
print 'skip movie link'
elif item.startswith("=?"):
next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
elif item.startswith('=='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[2:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
elif item.startswith('='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[1:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else:
if next_is_img_txt == False and met_article_start_char == False:
if item <> '':
if title_started == False:
#print 'Title started at ', item
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else:
new_raw_html = new_raw_html + item + '<p>\n'
else:
next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n'
new_html = new_raw_html + '</div></body></html>'
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg)
else:
# if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg\'', 'gif\'')
try:
gifurl = re.sub(r'dailynews.*txt', '', url)
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
new_html = new_html.replace(img, newimg)
# repeat with src quoted by double quotes, for text parsed from src txt
imglist = re.findall('src="?.*?jpg"', new_html)
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg"', 'gif"')
try:
#print 'url', url
pos = url.rfind('/')
gifurl = url[:pos+1]
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.find('"')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg)
return new_html
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
del item['absmiddle']
return soup
def populate_article_metadata(self, article, soup, first):
# thumbnails shouldn't be available if using hi-res images
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
img = soup.find('img')
if img is not None:
self.add_toc_thumbnail(article, img['src'])
try:
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
# look for content
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
if articlebody:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
textFound = False
for p in paras:
if not textFound:
summary_candidate = self.tag_to_string(p).strip()
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
if len(summary_candidate) > 0:
article.summary = article.text_summary = summary_candidate
textFound = True
else:
# display a simple text
#article.summary = article.text_summary = u'\u66f4\u591a......'
# display word counts
counts = 0
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
for p in paras:
summary_candidate = self.tag_to_string(p).strip()
counts += len(summary_candidate)
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
except:
self.log("Error creating article descriptions")
return
# override from the one in version 0.8.31
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = self.short_title()
# change 1: allow our own flag to tell if a periodical is to be generated
# also use customed date instead of current time
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
# end of change 1
# change 2: __appname__ replaced by newspaper publisher
__appname__ = self.publisher
mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
# change 4: in the following, all the nowf() are changed to adjusted time
# This one doesn't matter
mi.timestamp = nowf()
# change 5: skip listing the articles
#article_titles, aseen = [], set()
#for f in feeds:
# for a in f:
# if a.title and a.title not in aseen:
# aseen.add(a.title)
# article_titles.append(force_unicode(a.title, 'utf-8'))
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
#mi.comments = self.description
#if not isinstance(mi.comments, unicode):
# mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
# '\n\n'.join(article_titles))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
language = canonicalize_lang(self.language)
if language is not None:
mi.language = language
# This one affects the pub date shown in kindle title
#mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
desc = None
else:
desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
parent.add_item('%sindex.html'%adir, None,
a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
# Region - Hong Kong, Vancouver, Toronto
__Region__ = 'Vancouver'
# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
# please replace the following "True" with "False". (Default: True)
__MakePeriodical__ = True
# Turn below to true if your device supports display of CJK titles
# Turn below to True if your device supports display of CJK titles (Default: False)
__UseChineseTitle__ = False
# Set it to False if you want to skip images
# Set it to False if you want to skip images (Default: True)
__KeepImages__ = True
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
__IncludeSummary__ = False
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
__IncludeThumbnails__ = True
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
__UseLife__ = True
# (HK only) It is to disable premium content (Default: False)
__InclPremium__ = False
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
__ParsePFF__ = True
# (HK only) Turn below to True if you wish hi-res images (Default: False)
__HiResImg__ = False
# Override the date returned by the program if specifying a YYYYMMDD below
__Date__ = ''
'''
Change Log:
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
2011/10/19: fix a bug in txt source parsing
2011/10/17: disable fetching of premium content, also improved txt source parsing
2011/10/04: option to get hi-res photos for the articles
2011/09/21: fetching "column" section is made optional.
2011/09/18: parse "column" section stuff from source text file directly.
2011/09/07: disable "column" section as it is no longer offered free.
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
provide options to remove all images in the file
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
@ -37,30 +60,38 @@ Change Log:
2010/10/31: skip repeated articles in section pages
'''
import os, datetime, re
from calibre.utils.date import now as nowf
import os, datetime, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.localization import canonicalize_lang
# MAIN CLASS
class MPRecipe(BasicNewsRecipe):
if __Region__ == 'Hong Kong':
title = 'Ming Pao - Hong Kong'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = 'Ming Pao - Hong Kong'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
category = 'Chinese, News, Hong Kong'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'class':['heading']}), # for heading from txt
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['content']}), # for content from txt
dict(attrs={'class':['photo']}),
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
dict(attrs={'class':['images']}) # for images from txt
]
if __KeepImages__:
remove_tags = [dict(name='style'),
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: "</b>")
]
elif __Region__ == 'Vancouver':
title = 'Ming Pao - Vancouver'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
else:
title = 'Ming Pao - Vancouver'
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
category = 'Chinese, News, Vancouver'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
lambda match: ''),
]
elif __Region__ == 'Toronto':
title = 'Ming Pao - Toronto'
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = 'Ming Pao - Toronto'
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
category = 'Chinese, News, Toronto'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
conversion_options = {'linearize_tables':True}
timefmt = ''
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
# minIdx = i0
# i1 = url.find('1')
# if i1 >= 0 and i1 < minIdx:
# minIdx = i1
# i2 = url.find('2')
# if i2 >= 0 and i2 < minIdx:
# minIdx = i2
# i3 = url.find('3')
# if i3 >= 0 and i0 < minIdx:
# minIdx = i3
# i4 = url.find('4')
# if i4 >= 0 and i4 < minIdx:
# minIdx = i4
# i5 = url.find('5')
# if i5 >= 0 and i5 < minIdx:
# minIdx = i5
# i6 = url.find('6')
# if i6 >= 0 and i6 < minIdx:
# minIdx = i6
# i7 = url.find('7')
# if i7 >= 0 and i7 < minIdx:
# minIdx = i7
# i8 = url.find('8')
# if i8 >= 0 and i8 < minIdx:
# minIdx = i8
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
return url
def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow()
if __Region__ == 'Hong Kong':
# convert UTC to local hk time - at HKT 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
# convert UTC to local hk time - at HKT 4.30am, all news are available
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
elif __Region__ == 'Vancouver':
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
return dt_local
def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d")
if __Date__ <> '':
return __Date__
else:
return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d")
if __Date__ <> '':
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
else:
return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchyear(self):
if __Date__ <> '':
return __Date__[0:4]
else:
return self.get_dtlocal().strftime("%Y")
def get_fetchmonth(self):
if __Date__ <> '':
return __Date__[4:6]
else:
return self.get_dtlocal().strftime("%m")
def get_fetchday(self):
return self.get_dtlocal().strftime("%d")
if __Date__ <> '':
return __Date__[6:8]
else:
return self.get_dtlocal().strftime("%d")
def get_cover_url(self):
if __Region__ == 'Hong Kong':
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
if __InclPremium__ == True:
articles = self.parse_section2_txt(url, keystr)
else:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- editorial
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
if ed_articles:
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
#ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
#if ed_articles:
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
# special - finance
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
#fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
#if fin_articles:
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
articles = self.parse_section(url)
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
# articles = self.parse_section(url)
# if articles:
# feeds.append((title, articles))
# special - entertainment
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
if ent_articles:
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
#if ent_articles:
# feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
if __InclPremium__ == True:
# parse column section articles directly from .txt files
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
]:
articles = self.parse_section2_txt(url, keystr)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
if articles:
feeds.append((title, articles))
# special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
elif __Region__ == 'Vancouver':
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
# replace the url to the print-friendly version
if __ParsePFF__ == True:
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
url = re.sub('%2F.*%2F', '/', url)
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
url = url.replace('%2Etxt', '_print.htm')
url = url.replace('%5F', '_')
else:
url = url.replace('.htm', '_print.htm')
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
# parse from life.mingpao.com
def parse_section2(self, url, keystr):
br = mechanize.Browser()
br.set_handle_redirect(False)
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
try:
br.open_novisit(url)
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
except:
print 'skipping a premium article'
current_articles.reverse()
return current_articles
# parse from text file of life.mingpao.com
def parse_section2_txt(self, url, keystr):
self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href=True)
a.reverse()
current_articles = []
included_urls = []
for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
current_articles.reverse()
return current_articles
# preprocess those .txt and javascript based files
def preprocess_raw_html(self, raw_html, url):
new_html = raw_html
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
if url.rfind('_print.htm') <> -1:
# javascript based file
splitter = re.compile(r'\n')
new_raw_html = '<html><head><title>Untitled</title></head>'
new_raw_html = new_raw_html + '<body>'
for item in splitter.split(raw_html):
if item.startswith('var heading1 ='):
heading = item.replace('var heading1 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
new_raw_html = new_raw_html + '<div class="heading">' + heading
if item.startswith('var heading2 ='):
heading = item.replace('var heading2 = \'', '')
heading = heading.replace('\'', '')
heading = heading.replace(';', '')
if heading <> '':
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
else:
new_raw_html = new_raw_html + '</div>'
if item.startswith('var content ='):
content = item.replace("var content = ", '')
content = content.replace('\'', '')
content = content.replace(';', '')
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
if item.startswith('var photocontent ='):
photo = item.replace('var photocontent = \'', '')
photo = photo.replace('\'', '')
photo = photo.replace(';', '')
photo = photo.replace('<tr>', '')
photo = photo.replace('<td>', '')
photo = photo.replace('</tr>', '')
photo = photo.replace('</td>', '<br>')
photo = photo.replace('class="photo"', '')
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
new_html = new_raw_html + '</body></html>'
else:
# .txt based file
splitter = re.compile(r'\n') # Match non-digits
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
next_is_img_txt = False
title_started = False
title_break_reached = False
met_article_start_char = False
for item in splitter.split(raw_html):
item = item.strip()
# if title already reached but break between title and content not yet found, record title_break_reached
if title_started == True and title_break_reached == False and item == '':
title_break_reached = True
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
# start content
elif title_started == True and title_break_reached == True and met_article_start_char == False:
if item <> '':
met_article_start_char = True
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
#if item.startswith(u'\u3010'):
# met_article_start_char = True
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
else:
if next_is_img_txt == False:
if item.startswith("=@"):
print 'skip movie link'
elif item.startswith("=?"):
next_is_img_txt = True
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
elif item.startswith('=='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[2:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
elif item.startswith('='):
next_is_img_txt = True
if False:
# TODO: check existence of .gif first
newimg = '_' + item[1:].strip() + '.jpg'
new_raw_html += '<img src="' + newimg + '" /><p>\n'
else:
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
else:
if next_is_img_txt == False and met_article_start_char == False:
if item <> '':
if title_started == False:
#print 'Title started at ', item
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
title_started = True
else:
new_raw_html = new_raw_html + item + '\n'
else:
new_raw_html = new_raw_html + item + '<p>\n'
else:
next_is_img_txt = False
new_raw_html = new_raw_html + item + '\n'
new_html = new_raw_html + '</div></body></html>'
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
if __HiResImg__ == True:
# TODO: add a _ in front of an image url
if url.rfind('news.mingpao.com') > -1:
imglist = re.findall('src="?.*?jpg"', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
for img in imglist:
gifimg = img.replace('jpg"', 'gif"')
try:
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
# find the location of the first _
pos = img.find('_')
if pos > -1:
# if found, insert _ after the first _
newimg = img[0:pos] + '_' + img[pos:]
new_html = new_html.replace(img, newimg)
else:
# if not found, insert _ after "
new_html = new_html.replace(img[1:], '"_' + img[1:])
elif url.rfind('life.mingpao.com') > -1:
imglist = re.findall('src=\'?.*?jpg\'', new_html)
br = mechanize.Browser()
br.set_handle_redirect(False)
#print 'Img list: ', imglist, '\n'
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg\'', 'gif\'')
try:
gifurl = re.sub(r'dailynews.*txt', '', url)
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.rfind('/')
newimg = img[0:pos+1] + '_' + img[pos+1:]
new_html = new_html.replace(img, newimg)
# repeat with src quoted by double quotes, for text parsed from src txt
imglist = re.findall('src="?.*?jpg"', new_html)
for img in imglist:
#print 'Found img: ', img
gifimg = img.replace('jpg"', 'gif"')
try:
#print 'url', url
pos = url.rfind('/')
gifurl = url[:pos+1]
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
new_html = new_html.replace(img, gifimg)
except:
pos = img.find('"')
newimg = img[0:pos+1] + '_' + img[pos+1:]
#print 'Use hi-res img', newimg
new_html = new_html.replace(img, newimg)
return new_html
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
del item['absmiddle']
return soup
def populate_article_metadata(self, article, soup, first):
# thumbnails shouldn't be available if using hi-res images
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
img = soup.find('img')
if img is not None:
self.add_toc_thumbnail(article, img['src'])
try:
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
# look for content
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
if articlebody:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
textFound = False
for p in paras:
if not textFound:
summary_candidate = self.tag_to_string(p).strip()
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
if len(summary_candidate) > 0:
article.summary = article.text_summary = summary_candidate
textFound = True
else:
# display a simple text
#article.summary = article.text_summary = u'\u66f4\u591a......'
# display word counts
counts = 0
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
if not articlebodies:
articlebodies = soup.findAll('div',attrs={'class':'content'})
if not articlebodies:
articlebodies = soup.findAll('div', attrs={'id':'font'})
if articlebodies:
for articlebody in articlebodies:
# the text may or may not be enclosed in <p></p> tag
paras = articlebody.findAll('p')
if not paras:
paras = articlebody
for p in paras:
summary_candidate = self.tag_to_string(p).strip()
counts += len(summary_candidate)
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
except:
self.log("Error creating article descriptions")
return
# override from the one in version 0.8.31
def create_opf(self, feeds, dir=None):
if dir is None:
dir = self.output_dir
if __UseChineseTitle__ == True:
if __Region__ == 'Hong Kong':
title = u'\u660e\u5831 (\u9999\u6e2f)'
elif __Region__ == 'Vancouver':
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
elif __Region__ == 'Toronto':
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = self.short_title()
# change 1: allow our own flag to tell if a periodical is to be generated
# also use customed date instead of current time
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
# end of change 1
# change 2: __appname__ replaced by newspaper publisher
__appname__ = self.publisher
mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
# change 4: in the following, all the nowf() are changed to adjusted time
# This one doesn't matter
mi.timestamp = nowf()
# change 5: skip listing the articles
#article_titles, aseen = [], set()
#for f in feeds:
# for a in f:
# if a.title and a.title not in aseen:
# aseen.add(a.title)
# article_titles.append(force_unicode(a.title, 'utf-8'))
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
#mi.comments = self.description
#if not isinstance(mi.comments, unicode):
# mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
# '\n\n'.join(article_titles))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
language = canonicalize_lang(self.language)
if language is not None:
mi.language = language
# This one affects the pub date shown in kindle title
#mi.pubdate = nowf()
# now appears to need the time field to be > 12.00noon as well
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
desc = None
else:
desc = self.description_limiter(desc)
tt = a.toc_thumbnail if a.toc_thumbnail else None
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
parent.add_item('%sindex.html'%adir, None,
a.title if a.title else _('Untitled Article'),
play_order=po, author=auth,
description=desc, toc_thumbnail=tt)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -0,0 +1,15 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class Mlody_technik(BasicNewsRecipe):
title = u'Mlody technik'
__author__ = 'fenuks'
description = u'Młody technik'
category = 'science'
language = 'pl'
cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
#keep_only_tags=[dict(id='container')]
feeds = [(u'Artyku\u0142y', u'http://www.mt.com.pl/feed')]

View File

@ -1,9 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
moneynews.newsmax.com
www.moneynews.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
@ -12,40 +10,40 @@ class MoneyNews(BasicNewsRecipe):
title = 'Moneynews.com'
__author__ = 'Darko Miletic'
description = 'Financial news worldwide'
publisher = 'moneynews.com'
language = 'en'
publisher = 'Newsmax.com'
language = 'en'
category = 'news, finances, USA, business'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
encoding = 'utf8'
extra_css = 'img{display: block} body{font-family: Arial, Helvetica, sans-serif}'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
feeds = [
(u'Street Talk' , u'http://moneynews.newsmax.com/xml/streettalk.xml' )
,(u'Finance News' , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
,(u'Economy' , u'http://moneynews.newsmax.com/xml/economy.xml' )
,(u'Companies' , u'http://moneynews.newsmax.com/xml/companies.xml' )
,(u'Markets' , u'http://moneynews.newsmax.com/xml/Markets.xml' )
,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml' )
(u'Street Talk' , u'http://www.moneynews.com/rss/StreetTalk/8.xml' )
,(u'Finance News' , u'http://www.moneynews.com/rss/FinanceNews/4.xml' )
,(u'Economy' , u'http://www.moneynews.com/rss/Economy/2.xml' )
,(u'Companies' , u'http://www.moneynews.com/rss/Companies/6.xml' )
,(u'Markets' , u'http://www.moneynews.com/rss/Markets/7.xml' )
,(u'Investing & Analysis' , u'http://www.moneynews.com/rss/InvestingAnalysis/17.xml')
]
keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
keep_only_tags = [dict(name='div', attrs={'class':'copy'})]
remove_tags = [
dict(name='td' , attrs={'id':'article_fontsize'})
,dict(name='table', attrs={'id':'toolbox' })
,dict(name='tr' , attrs={'id':'noprint3' })
dict(attrs={'class':['MsoNormal', 'MsoNoSpacing']}),
dict(name=['object','link','embed','form','meta'])
]
def print_version(self, url):
nodeid = url.rpartition('/')[2]
return 'http://www.moneynews.com/PrintTemplate?nodeid=' + nodeid

View File

@ -7,6 +7,7 @@ class naczytniki(BasicNewsRecipe):
language = 'pl'
description ='everything about e-readers'
category='readers'
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100
remove_tags_after= dict(name='div', attrs={'class':'sociable'})

54
recipes/nol.recipe Normal file
View File

@ -0,0 +1,54 @@
################################################################################
#Description: http://nol.hu/ RSS channel
#Author: Bigpapa (bigpapabig@hotmail.com)
#Date: 2011.12.18. - V1.1
################################################################################
from calibre.web.feeds.recipes import BasicNewsRecipe
class NOL(BasicNewsRecipe):
title = u'NOL'
__author__ = 'Bigpapa'
oldest_article = 5
max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
language = 'hu'
publication_type = 'newsportal'
conversion_options ={
'linearize_tables' : True,
}
keep_only_tags = [
dict(name='table', attrs={'class':['article-box']})
]
remove_tags = [
dict(name='div', attrs={'class':['h','ad-container-outer','tags noborder','ad-container-inner','image-container-lead','tags','related-container']}),
dict(name='h4'),
dict(name='tfoot'),
dict(name='td', attrs={'class':['foot']}),
dict(name='span', attrs={'class':['image-container-caption']}),
]
feeds = [
# (u'V\xe1logat\xe1s', 'http://nol.hu/feed/valogatas.rss'),
(u'Belf\xf6ld', 'http://nol.hu/feed/belfold.rss'),
(u'K\xfclf\xf6ld', 'http://nol.hu/feed/kulfold.rss'),
(u'Gazdas\xe1g', 'http://nol.hu/feed/gazdasag.rss'),
(u'V\xe9lem\xe9ny', 'http://nol.hu/feed/velemeny.rss'),
(u'Kult\xfara', 'http://nol.hu/feed/kult.rss'),
(u'Tud/Tech', 'http://nol.hu/feed/tud-tech.rss'),
(u'Sport', 'http://nol.hu/feed/sport.rss'),
(u'Noller', 'http://nol.hu/feed/noller.rss'),
(u'Mozaik', 'http://nol.hu/feed/mozaik.rss'),
(u'Utaz\xe1s', 'http://nol.hu/feed/utazas.rss'),
(u'Aut\xf3', 'http://nol.hu/feed/auto.rss'),
(u'Voks', 'http://nol.hu/feed/voks.rss'),
]

View File

@ -1,20 +1,21 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Nowa_Fantastyka(BasicNewsRecipe):
title = u'Nowa Fantastyka'
oldest_article = 7
__author__ = 'fenuks'
language = 'pl'
encoding='latin2'
description ='site for fantasy readers'
category='fantasy'
max_articles_per_feed = 100
INDEX='http://www.fantastyka.pl/'
no_stylesheets=True
needs_subscription = 'optional'
remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
#remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
remove_tags=[dict(attrs={'class':'avatar2'})]
feeds = []
remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
def find_articles(self, url):
articles = []
@ -45,3 +46,13 @@ class Nowa_Fantastyka(BasicNewsRecipe):
cover=soup.find(name='img', attrs={'class':'okladka'})
self.cover_url=self.INDEX+ cover['src']
return getattr(self, 'cover_url', self.cover_url)
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.fantastyka.pl/')
br.select_form(nr=0)
br['login'] = self.username
br['pass'] = self.password
br.submit()
return br

View File

@ -0,0 +1,79 @@
#!/usr/bin/env python
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
__license__ = 'GPL v3'
'''
calibre recipe for prospectmagazine.co.uk (subscription)
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class ProspectMagUK(BasicNewsRecipe):
title = u'Prospect Magazine'
description = 'A general-interest publication offering analysis and commentary about politics, news and business.'
__author__ = 'barty, duluoz'
timefmt = ' [%d %B %Y]'
no_stylesheets = True
publication_type = 'magazine'
masthead_url = 'http://www.prospectmagazine.co.uk/wp-content/themes/prospect/images/titleMain.jpg'
category = 'news, UK'
language = 'en_GB'
max_articles_per_feed = 100
auto_cleanup = True
needs_subscription = True
auto_cleanup_keep = '//div[@class="lead_image"]'
remove_tags = [{'class':['shareinpost','postutils','postinfo']}]
INDEX = 'http://www.prospectmagazine.co.uk/current-issue'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.prospectmagazine.co.uk/wp-login.php')
br.select_form(name='loginform')
br['log'] = self.username
br['pwd'] = self.password
br.submit()
return br
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
#div = soup.find('h1',text=re.compile(r'Issue \d+'))
#fname = self.tag_to_string( div) if div is not None else 'Current Issue'
div = soup.find('div', id='cover_image')
if div is not None:
img = div.find('img', src=True)
if img is not None:
src = img['src']
if src.startswith('/'):
src = 'http://www.prospectmagazine.co.uk' + src
self.cover_url = src
feeds = []
# loop through sections
for sect in soup.findAll('div',attrs={'class':'sectionheading'}):
fname = self.tag_to_string( sect).replace('>','').strip()
self.log('Found section', fname)
articles = []
# note: can't just find siblings with class='post' because that will also
# grab all the articles belonging to the sections that follow.
for item in sect.findNextSiblings('div',attrs={'class':True}):
if not 'post' in item['class']: break
a = item.find('a', href=True)
if a is None: continue
url = a['href']
title = self.tag_to_string(a)
p = item.find('p')
desc = self.tag_to_string( p) if p is not None else ''
art = {'title':title, 'description':desc,'date':' ', 'url':url}
p = item.find(attrs={'class':re.compile('author')})
self.log('\tFound article:', title, '::', url)
if p is not None:
art['author'] = self.tag_to_string( p).strip()
articles.append(art)
feeds.append((fname, articles))
return feeds

View File

@ -0,0 +1,17 @@
from calibre.web.feeds.news import BasicNewsRecipe
class spn(BasicNewsRecipe):
title = u'Salonica Press News'
language = 'gr'
__author__ = "SteliosGero"
oldest_article = 3
max_articles_per_feed = 100
auto_cleanup = True
category = 'news, GR'
language = 'el'
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', u'http://www.spnews.gr/politiki?format=feed&amp;type=rss'), (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', u'http://www.spnews.gr/oikonomia?format=feed&amp;type=rss'), (u'\u0391\u03c5\u03c4\u03bf\u03b4\u03b9\u03bf\u03af\u03ba\u03b7\u03c3\u03b7', u'http://www.spnews.gr/aftodioikisi?format=feed&amp;type=rss'), (u'\u039a\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1', u'http://www.spnews.gr/koinonia?format=feed&amp;type=rss'), (u'\u0391\u03b8\u03bb\u03b7\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/sports?format=feed&amp;type=rss'), (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae', u'http://www.spnews.gr/diethni?format=feed&amp;type=rss'), (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/politismos?format=feed&amp;type=rss'), (u'Media', u'http://www.spnews.gr/media-news?format=feed&amp;type=rss'), (u'\u0396\u03c9\u03ae', u'http://www.spnews.gr/zoi?format=feed&amp;type=rss'), (u'\u03a4\u03b5\u03c7\u03bd\u03bf\u03bb\u03bf\u03b3\u03af\u03b1', u'http://spnews.gr/texnologia?format=feed&amp;type=rss'), (u'\u03a0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd', u'http://spnews.gr/periballon?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03c0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parapolitika?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b4\u03b7\u03bc\u03bf\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/paradimotika?format=feed&amp;type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b1\u03b8\u03bb\u03b7\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parathlitika?format=feed&amp;type=rss'), (u'\u0391\u03c0\u03cc\u03c8\u03b5\u03b9\u03c2', u'http://spnews.gr/apopseis?format=feed&amp;type=rss'), (u'\u03a3\u03c5\u03bd\u03b5\u03cd\u03be\u03b5\u03b9\u03c2', u'http://spnews.gr/synenteykseis?format=feed&amp;type=rss'), (u'Alert!', u'http://spnews.gr/alert?format=feed&amp;type=rss')]
def print_version(self, url):
return url+'?tmpl=component&print=1&layout=default&page='

View File

@ -8,8 +8,8 @@ class SpidersWeb(BasicNewsRecipe):
cover_url = 'http://www.spidersweb.pl/wp-content/themes/spiderweb/img/Logo.jpg'
category = 'IT, WEB'
language = 'pl'
no_stylesheers=True
max_articles_per_feed = 100
remove_tags_before=dict(name="h1", attrs={'class':'Title'})
remove_tags_after=dict(name="div", attrs={'class':'Text'})
remove_tags=[dict(name='div', attrs={'class':['Tags', 'CommentCount FloatL', 'Show FloatL']})]
keep_only_tags=[dict(id='Post')]
remove_tags=[dict(name='div', attrs={'class':['Comments', 'Shows', 'Post-Tags']})]
feeds = [(u'Wpisy', u'http://www.spidersweb.pl/feed')]

View File

@ -6,92 +6,49 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch sueddeutsche.de
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Sueddeutsche(BasicNewsRecipe):
title = u'sueddeutsche.de'
description = 'News from Germany'
__author__ = 'Oliver Niesner and Armin Geller' #AGe 2011-12-16
__author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2011-12-16
use_embedded_content = False
timefmt = ' [%d %b %Y]'
oldest_article = 7
max_articles_per_feed = 50
no_stylesheets = True
language = 'de'
auto_cleanup = True
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1236175.1323967473!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
# 2011-12-16 AGe
# remove_tags = [ dict(name='link'), dict(name='iframe'),
# dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_AD",
# "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
#
# dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
# "pages closed","basebox right narrow","headslot galleried"]}),
#
# dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
# "item","videoBigButton","articlefooter full-column",
# "bildbanderolle full-column","footerCopy padleft5"]}),
#
# dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
# dict(name='div', attrs={'style':["position:relative;"]}),
# dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
# dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
# dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}),
# dict(name='td', attrs={'class':["artikelDruckenRight"]}),
# dict(name='p', text = "ANZEIGE")
# ]
# remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]
#
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
a{font-family:Arial,Helvetica,sans-serif; font-style:italic;}
.dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
.artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
body{font-family:Arial,Helvetica,sans-serif; }
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
#
auto_cleanup = True
cover_url = 'http://polpix.sueddeutsche.com/polopoly_fs/1.1237395.1324054345!/image/image.jpg_gen/derivatives/860x860/image.jpg' # 2011-12-16 AGe
feeds = [
# (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'), # AGe 2011-11-13
# (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'), #AGe 2011-12-16 deactivated
# (u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
# (u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
# (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
# (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
# (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only #AGe 2011-12-16 deactivated
(u'Politik', u'http://www.sueddeutsche.de/app/service/rss/ressort/politik/rss.xml'),
(u'Wirtschaft', u'http://www.sueddeutsche.de/app/service/rss/ressort/wirtschaft/rss.xml'),
(u'Geld', u'http://www.sueddeutsche.de/app/service/rss/ressort/finanzen/rss.xml'),
(u'Kultur', u'http://www.sueddeutsche.de/app/service/rss/ressort/kultur/rss.xml'),
(u'Sport', u'http://www.sueddeutsche.de/app/service/rss/ressort/sport/rss.xml'),
(u'Leben', u'http://www.sueddeutsche.de/app/service/rss/ressort/leben/rss.xml'),
(u'Karriere', u'http://www.sueddeutsche.de/app/service/rss/ressort/karriere/rss.xml'),
(u'München & Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
(u'Bayern', u'http://www.sueddeutsche.de/app/service/rss/ressort/bayern/rss.xml'),
(u'Medien', u'http://www.sueddeutsche.de/app/service/rss/ressort/medien/rss.xml'),
(u'Digital', u'http://www.sueddeutsche.de/app/service/rss/ressort/computerwissen/rss.xml'),
(u'Auto', u'http://www.sueddeutsche.de/app/service/rss/ressort/autoreise/rss.xml'),
(u'Wissen', u'http://www.sueddeutsche.de/app/service/rss/ressort/wissen/rss.xml'),
(u'Panorama', u'http://www.sueddeutsche.de/app/service/rss/ressort/panorama/rss.xml'),
(u'Reise', u'http://www.sueddeutsche.de/app/service/rss/ressort/reise/rss.xml'),
(u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
(u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
(u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
(u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
(u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
(u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
(u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
(u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
(u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
(u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
(u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
(u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
(u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
(u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
(u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
(u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
(u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'), # sometimes only
(u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'), # sometimes only
(u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
(u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'), # sometimes only
]
# def print_version(self, url): #AGe 2011-12-16 deactivated
# main, sep, id = url.rpartition('/') #AGe 2011-12-16 deactivated
# return main + '/2.220/' + id #AGe 2011-12-16 deactivated
# AGe 2011-12-16 Problem of Handling redirections solved by a solution of Recipes-Re-usable code from kiklop74.
# Feed is: http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss
# Article download source is: http://sz.de/1.1237295 (Ski Alpin: Der Erfolg kommt, der Trainer geht)
# Article source is: http://www.sueddeutsche.de/sport/ski-alpin-der-erfolg-kommt-der-trainer-geht-1.1237295
# Article printversion is: http://www.sueddeutsche.de/sport/2.220/ski-alpin-der-erfolg-kommt-der-trainer-geht-1.1237295
def print_version(self, url):
n_url=self.browser.open_novisit(url).geturl()
main, sep, id = n_url.rpartition('/')
return main + '/2.220/' + id

View File

@ -1,4 +1,4 @@
import re
import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe
class TimesOfIndia(BasicNewsRecipe):
@ -17,7 +17,9 @@ class TimesOfIndia(BasicNewsRecipe):
]
remove_tags = [
{'class':re.compile('tabsintbgshow|prvnxtbg')},
{'id':['fbrecommend', 'relmaindiv']}
{'id':['fbrecommend', 'relmaindiv', 'shretxt', 'fbrecos', 'twtdiv',
'gpls', 'auim']},
{'class':['twitter-share-button', 'cmtmn']},
]
feeds = [
@ -46,25 +48,27 @@ class TimesOfIndia(BasicNewsRecipe):
]
def get_article_url(self, article):
# Times of India sometimes serves an ad page instead of the article,
# this code, detects and circumvents that
url = BasicNewsRecipe.get_article_url(self, article)
if '/0Ltimesofindia' in url:
url = url.partition('/0L')[-1]
url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
'/').replace('0E', '-')
url = 'http://' + url.rpartition('/')[0]
match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
if match is not None:
num = match.group(1)
num = re.sub(r'[^0-9]', '', num)
return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
num)
else:
cms = re.search(r'/(\d+)\.cms', url)
if cms is not None:
return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
cms.group(1))
try:
s = article.summary
return urllib.unquote(
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
except:
pass
link = article.get('link', None)
if link and link.split('/')[-1]=="story01.htm":
link=link.split('/')[-2]
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://'}
for k, v in encoding.iteritems():
link = link.replace(k, v)
return link
return url
def print_version(self, url):
return url + '?prtpage=1'
def preprocess_html(self, soup, *args):
byl = soup.find(attrs={'class':'byline'})
if byl is not None:
for l in byl.findAll('label'):
l.extract()
return soup

17
recipes/tuttojove.recipe Normal file
View File

@ -0,0 +1,17 @@
__license__ = 'GPL v3'
__author__ = 'faber1971'
description = 'Italian website on Juventus F.C. - v1.00 (17, December 2011)'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305984536(BasicNewsRecipe):
title = u'tuttojuve'
description = 'Juventus'
language = 'it'
__author__ = 'faber1971'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'notizie', u'http://feeds.tuttojuve.com/rss/'), (u'da vinovo', u'http://feeds.tuttojuve.com/rss/?c=10'), (u'primo piano', u'http://feeds.tuttojuve.com/rss/?c=16'), (u'editoriale', u'http://feeds.tuttojuve.com/rss/?c=3'), (u'il punto', u'http://feeds.tuttojuve.com/rss/?c=8'), (u'pagelle', u'http://feeds.tuttojuve.com/rss/?c=9'), (u'avversario', u'http://feeds.tuttojuve.com/rss/?c=11')]
def print_version(self, url):
return self.browser.open_novisit(url).geturl()

View File

@ -8,14 +8,14 @@ msgstr ""
"Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2011-11-26 08:48+0000\n"
"PO-Revision-Date: 2011-12-17 09:29+0000\n"
"Last-Translator: Jellby <Unknown>\n"
"Language-Team: Spanish <es@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2011-11-27 05:24+0000\n"
"X-Generator: Launchpad (build 14381)\n"
"X-Launchpad-Export-Date: 2011-12-18 04:37+0000\n"
"X-Generator: Launchpad (build 14525)\n"
#. name for aaa
msgid "Ghotuo"
@ -4963,7 +4963,7 @@ msgstr "Catawba"
#. name for chd
msgid "Chontal; Highland Oaxaca"
msgstr ""
msgstr "Chontal oaxaqueño de las tierras altas"
#. name for che
msgid "Chechen"
@ -4971,7 +4971,7 @@ msgstr "Checheno"
#. name for chf
msgid "Chontal; Tabasco"
msgstr ""
msgstr "Chontal de tabasco"
#. name for chg
msgid "Chagatai"
@ -4983,7 +4983,7 @@ msgstr "Chinook"
#. name for chj
msgid "Chinantec; Ojitlán"
msgstr ""
msgstr "Chinanteco de Ojitlán"
#. name for chk
msgid "Chuukese"
@ -5011,7 +5011,7 @@ msgstr "Chipewyan"
#. name for chq
msgid "Chinantec; Quiotepec"
msgstr ""
msgstr "Chinanteco de Quiotepec"
#. name for chr
msgid "Cherokee"
@ -5043,7 +5043,7 @@ msgstr "Cheyenne"
#. name for chz
msgid "Chinantec; Ozumacín"
msgstr ""
msgstr "Chinanteco de Ozumacín"
#. name for cia
msgid "Cia-Cia"
@ -5051,7 +5051,7 @@ msgstr "Cia-cia"
#. name for cib
msgid "Gbe; Ci"
msgstr ""
msgstr "Cigbe"
#. name for cic
msgid "Chickasaw"
@ -5215,7 +5215,7 @@ msgstr "Arameo moderno caldeo"
#. name for cle
msgid "Chinantec; Lealao"
msgstr ""
msgstr "Chinanteco de Lealao"
#. name for clh
msgid "Chilisso"
@ -5239,7 +5239,7 @@ msgstr "Clallam"
#. name for clo
msgid "Chontal; Lowland Oaxaca"
msgstr ""
msgstr "Chontal oaxaqueño de las tieras bajas"
#. name for clu
msgid "Caluyanun"
@ -5251,7 +5251,7 @@ msgstr "Chulym"
#. name for cly
msgid "Chatino; Eastern Highland"
msgstr ""
msgstr "Chatino Lachao-Yolotepec"
#. name for cma
msgid "Maa"
@ -5327,7 +5327,7 @@ msgstr ""
#. name for cnl
msgid "Chinantec; Lalana"
msgstr ""
msgstr "Chinanteco de Lalana"
#. name for cno
msgid "Con"
@ -5339,7 +5339,7 @@ msgstr "Asmat central"
#. name for cnt
msgid "Chinantec; Tepetotutla"
msgstr ""
msgstr "Chinanteco de Tepetotutla"
#. name for cnu
msgid "Chenoua"
@ -5355,7 +5355,7 @@ msgstr "Córnico medio"
#. name for coa
msgid "Malay; Cocos Islands"
msgstr ""
msgstr "Malayo de las Islas Cocos"
#. name for cob
msgid "Chicomuceltec"
@ -5391,7 +5391,7 @@ msgstr "Cochimi"
#. name for cok
msgid "Cora; Santa Teresa"
msgstr ""
msgstr "Cora de Santa Teresa"
#. name for col
msgid "Columbia-Wenatchi"
@ -5455,7 +5455,7 @@ msgstr "Chocho"
#. name for cpa
msgid "Chinantec; Palantla"
msgstr ""
msgstr "Chinanteco de Palantla"
#. name for cpb
msgid "Ashéninka; Ucayali-Yurúa"
@ -5599,7 +5599,7 @@ msgstr "Cruzeño"
#. name for csa
msgid "Chinantec; Chiltepec"
msgstr ""
msgstr "Chinanteco de Chiltepec"
#. name for csb
msgid "Kashubian"
@ -5651,7 +5651,7 @@ msgstr "Lengua de signos colombiana"
#. name for cso
msgid "Chinantec; Sochiapan"
msgstr ""
msgstr "Chinanteco de Sochiapan"
#. name for csq
msgid "Croatia Sign Language"
@ -5683,7 +5683,7 @@ msgstr "Coos"
#. name for cta
msgid "Chatino; Tataltepec"
msgstr ""
msgstr "Chatino de Tataltepec"
#. name for ctc
msgid "Chetco"
@ -5695,7 +5695,7 @@ msgstr ""
#. name for cte
msgid "Chinantec; Tepinapa"
msgstr ""
msgstr "Chinanteco de Tepinapa"
#. name for ctg
msgid "Chittagonian"
@ -5703,7 +5703,7 @@ msgstr "Chitagoniano"
#. name for ctl
msgid "Chinantec; Tlacoatzintepec"
msgstr ""
msgstr "Chinanteco de Tlacoatzintepec"
#. name for ctm
msgid "Chitimacha"
@ -5719,7 +5719,7 @@ msgstr "Emberá-catío"
#. name for ctp
msgid "Chatino; Western Highland"
msgstr ""
msgstr "Chatino"
#. name for cts
msgid "Bicolano; Northern Catanduanes"
@ -5735,7 +5735,7 @@ msgstr "Chol"
#. name for ctz
msgid "Chatino; Zacatepec"
msgstr ""
msgstr "Chatino de Zacatepec"
#. name for cua
msgid "Cua"
@ -5747,7 +5747,7 @@ msgstr "Cubeo"
#. name for cuc
msgid "Chinantec; Usila"
msgstr ""
msgstr "Chinanteco de Usila"
#. name for cug
msgid "Cung"
@ -5819,7 +5819,7 @@ msgstr "Chug"
#. name for cvn
msgid "Chinantec; Valle Nacional"
msgstr ""
msgstr "Chinanteco de Valle Nacional"
#. name for cwa
msgid "Kabwa"
@ -5847,7 +5847,7 @@ msgstr "Kuwaataay"
#. name for cya
msgid "Chatino; Nopala"
msgstr ""
msgstr "Chatino de Nopala"
#. name for cyb
msgid "Cayubaba"
@ -5871,7 +5871,7 @@ msgstr "Knaanic"
#. name for czn
msgid "Chatino; Zenzontepec"
msgstr ""
msgstr "Chatino de Zenzontepec"
#. name for czo
msgid "Chinese; Min Zhong"
@ -10603,7 +10603,7 @@ msgstr "Javanés"
#. name for jax
msgid "Malay; Jambi"
msgstr ""
msgstr "Malayo de Jambi"
#. name for jay
msgid "Yan-nhangu"
@ -14743,7 +14743,7 @@ msgstr "Lorung meridional"
#. name for lrt
msgid "Malay; Larantuka"
msgstr ""
msgstr "Malayo de Larantuka"
#. name for lrv
msgid "Larevat"
@ -15099,7 +15099,7 @@ msgstr ""
#. name for max
msgid "Malay; North Moluccan"
msgstr ""
msgstr "Malayo de las Molucas septentrional"
#. name for maz
msgid "Mazahua; Central"
@ -15127,7 +15127,7 @@ msgstr ""
#. name for mbf
msgid "Malay; Baba"
msgstr ""
msgstr "Malayo baba"
#. name for mbh
msgid "Mangseng"
@ -15467,7 +15467,7 @@ msgstr "Mende (Sierra Leona)"
#. name for meo
msgid "Malay; Kedah"
msgstr ""
msgstr "Malayo de Kedah"
#. name for mep
msgid "Miriwung"
@ -15511,7 +15511,7 @@ msgstr ""
#. name for mfa
msgid "Malay; Pattani"
msgstr ""
msgstr "Malayo de Pattani"
#. name for mfb
msgid "Bangka"
@ -15571,7 +15571,7 @@ msgstr ""
#. name for mfp
msgid "Malay; Makassar"
msgstr ""
msgstr "Malayo de Macasar"
#. name for mfq
msgid "Moba"
@ -16059,7 +16059,7 @@ msgstr ""
#. name for mkn
msgid "Malay; Kupang"
msgstr ""
msgstr "Malayo de Kupang"
#. name for mko
msgid "Mingang Doso"
@ -16207,7 +16207,7 @@ msgstr ""
#. name for mlz
msgid "Malaynon"
msgstr ""
msgstr "Malaynón"
#. name for mma
msgid "Mama"
@ -16623,7 +16623,7 @@ msgstr ""
#. name for mqg
msgid "Malay; Kota Bangun Kutai"
msgstr ""
msgstr "Malayo kutai de Kota Bangun"
#. name for mqh
msgid "Mixtec; Tlazoyaltepec"
@ -16839,7 +16839,7 @@ msgstr "Malgache masikoro"
#. name for msi
msgid "Malay; Sabah"
msgstr ""
msgstr "Malayo de Sabah"
#. name for msj
msgid "Ma (Democratic Republic of Congo)"
@ -22607,7 +22607,7 @@ msgstr ""
#. name for sci
msgid "Creole Malay; Sri Lankan"
msgstr ""
msgstr "Malo criollo de Sri Lanka"
#. name for sck
msgid "Sadri"
@ -27187,7 +27187,7 @@ msgstr ""
#. name for vkt
msgid "Malay; Tenggarong Kutai"
msgstr ""
msgstr "Malayo kutai de Tenggarong"
#. name for vku
msgid "Kurrama"
@ -28395,7 +28395,7 @@ msgstr "Edomita"
#. name for xdy
msgid "Dayak; Malayic"
msgstr ""
msgstr "Dayak malayo"
#. name for xeb
msgid "Eblan"
@ -28727,7 +28727,7 @@ msgstr "Lengua de signos malasia"
#. name for xmm
msgid "Malay; Manado"
msgstr ""
msgstr "Malayo de Manado"
#. name for xmn
msgid "Persian; Manichaean Middle"

View File

@ -233,7 +233,7 @@ class GetTranslations(Translations): # {{{
if self.modified_translations:
subprocess.check_call(['bzr', 'commit', '-m',
'IGN:Updated translations', self.PATH])
'IGN:Updated translations'])
else:
print('No updated translations available')

View File

@ -527,7 +527,7 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252',
ent = match.group(1)
if ent in exceptions:
return '&'+ent+';'
if ent == 'apos':
if ent in {'apos', 'squot'}: # squot is generated by some broken CMS software
return check("'")
if ent == 'hellips':
ent = 'hellip'

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 8, 31)
numeric_version = (0, 8, 32)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -48,7 +48,8 @@ class ANDROID(USBMS):
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
0x70c6 : [0x226]
0x70c6 : [0x226],
0x4316 : [0x216],
},
# Freescale
0x15a2 : {
@ -87,6 +88,7 @@ class ANDROID(USBMS):
0x6877 : [0x0400],
0x689e : [0x0400],
0xdeed : [0x0222],
0x1234 : [0x0400],
},
# Viewsonic/Vizio
@ -170,13 +172,14 @@ class ANDROID(USBMS):
'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107']
'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
'GT-I9003_CARD', 'XT912']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
'__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F']
'A1-07___C0541A4F', 'XT912']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -170,8 +170,8 @@ class ODYSSEY(N516):
description = _('Communicate with the Cybook Odyssey eBook reader.')
BCD = [0x316]
VENDOR_NAME = 'LINUX'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
VENDOR_NAME = ['LINUX', 'BOOKEEN']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['FILE-STOR_GADGET', 'FLASH_DISK']
FORMATS = ['epub', 'fb2', 'html', 'pdf', 'txt']

View File

@ -266,12 +266,14 @@ class PRST1(USBMS):
collections = booklist.get_collections(collections_attributes)
with closing(sqlite.connect(dbpath)) as connection:
self.update_device_books(connection, booklist, source_id, plugboard)
self.update_device_books(connection, booklist, source_id,
plugboard, dbpath)
self.update_device_collections(connection, booklist, collections, source_id)
debug_print('PRST1: finished update_device_database')
def update_device_books(self, connection, booklist, source_id, plugboard):
def update_device_books(self, connection, booklist, source_id, plugboard,
dbpath):
opts = self.settings()
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
@ -284,12 +286,12 @@ class PRST1(USBMS):
query = 'SELECT file_path, _id FROM books'
cursor.execute(query)
except DatabaseError:
raise DeviceError('The SONY database is corrupted. '
raise DeviceError(('The SONY database is corrupted. '
' Delete the file %s on your reader and then disconnect '
' reconnect it. If you are using an SD card, you '
' should delete the file on the card as well. Note that '
' deleting this file may cause your reader to forget '
' any notes/highlights, etc.')
' deleting this file will cause your reader to forget '
' any notes/highlights, etc.')%dbpath)
db_books = {}
for i, row in enumerate(cursor):

View File

@ -276,11 +276,11 @@ OptionRecommendation(name='duplicate_links_in_toc',
OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and "
r"re:test(., 'chapter|book|section|part|prologue|epilogue\s+', 'i')) or @class "
r"re:test(., '\s*((chapter|book|section|part)\s+)|((prolog|prologue|epilogue)(\s+|$))', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words '
'"chapter","book","section" or "part" as chapter titles as '
'"chapter","book","section", "prologue", "epilogue", or "part" as chapter titles as '
'well as any tags that have class="chapter". The expression '
'used must evaluate to a list of elements. To disable chapter '
'detection, use the expression "/". See the XPath Tutorial '

View File

@ -35,7 +35,7 @@ def initialize_container(path_to_container, opf_name='metadata.opf',
'''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
zf = ZipFile(path_to_container, 'w')
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
zf.writestr('META-INF/', '', 0700)
zf.writestr('META-INF/', '', 0755)
zf.writestr('META-INF/container.xml', CONTAINER)
for path, _, data in extra_entries:
zf.writestr(path, data)

View File

@ -1136,7 +1136,8 @@ class BinaryRecord(object): # {{{
self.raw = record.raw
sig = self.raw[:4]
name = '%06d'%idx
if sig in (b'FCIS', b'FLIS', b'SRCS', b'DATP'):
if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
b'FDST', b'AUDI', b'VIDE',}:
name += '-' + sig.decode('ascii')
elif sig == b'\xe9\x8e\r\n':
name += '-' + 'EOF'

View File

@ -325,6 +325,7 @@ class MobiMLizer(object):
elem.text = None
elem.set('id', id_)
elem.tail = tail
elem.tag = XHTML('a')
else:
return
tag = barename(elem.tag)

View File

@ -502,6 +502,7 @@ class MobiReader(object):
self.processed_html = self.processed_html.replace('> <', '>\n<')
self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
self.processed_html = re.sub(r'<\?xml[^>]*>', '', self.processed_html)
self.processed_html = re.sub(r'<(/?)o:p', r'<\1p', self.processed_html)
# Swap inline and block level elements, and order block level elements according to priority
# - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', '\g<para>'+'\g<styletags>', self.processed_html)
@ -974,7 +975,7 @@ class MobiReader(object):
processed_records.append(i)
data = self.sections[i][0]
if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
b'RESC', b'BOUN', b'FDST', b'DATP'}:
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
# A FLIS, FCIS, SRCS or EOF record, ignore
continue
buf = cStringIO.StringIO(data)

View File

@ -942,7 +942,13 @@ class Manifest(object):
if isinstance(data, etree._Element):
ans = xml2str(data, pretty_print=self.oeb.pretty_print)
if self.media_type in OEB_DOCS:
ans = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ans)
# Convert self closing div|span|a tags to normally closed
# ones, as they are interpreted incorrectly by some browser
# based renderers
ans = re.sub(
# tag name followed by either a space or a /
r'<(?P<tag>div|a|span)(?=[\s/])(?P<arg>[^>]*)/>',
r'<\g<tag>\g<arg>></\g<tag>>', ans)
return ans
if isinstance(data, unicode):
return data.encode('utf-8')

View File

@ -4,6 +4,7 @@
###
Copyright 2011, Kovid Goyal <kovid@kovidgoyal.net>
Released under the GPLv3 License
Based on code originally written by Peter Sorotkin (epubcfi.js)
###
#
log = (error) ->
@ -159,6 +160,63 @@ class CanonicalFragmentIdentifier
point = {}
error = null
offset = null
if (r = cfi.match(/^:(\d+)/)) != null
# Character offset
offset = parseInt(r[1])
cfi = cfi.substr(r[0].length)
if (r = cfi.match(/^~(-?\d+(\.\d+)?)/)) != null
# Temporal offset
point.time = r[1] - 0 # Coerce to number
cfi = cfi.substr(r[0].length)
if (r = cfi.match(/^@(-?\d+(\.\d+)?),(-?\d+(\.\d+)?)/)) != null
# Spatial offset
point.x = r[1] - 0 # Coerce to number
point.y = r[3] - 0 # Coerce to number
cfi = cfi.substr(r[0].length)
if( (r = cfi.match(/^\[([^\]]+)\]/)) != null )
assertion = r[1]
cfi = cfi.substr(r[0].length)
if (r = assertion.match(/;s=([ab])$/)) != null
if r.index > 0 and assertion[r.index - 1] != '^'
assertion = assertion.substr(0, r.index)
point.forward = (r[1] == 'a')
assertion = unescape_from_cfi(assertion)
# TODO: Handle text assertion
# Find the text node that contains the offset
node?.parentNode?.normalize()
if offset != null
while true
len = node.nodeValue.length
if offset < len or (not point.forward and offset == len)
break
next = false
while true
nn = node.nextSibling
if nn.nodeType in [3, 4, 5, 6] # Text node, entity, cdata
next = nn
break
if not next
if offset > len
error = "Offset out of range: #{ offset }"
offset = len
break
node = next
offset -= len
point.offset = offset
point.node = node
if error
point.error = error
else if cfi.length > 0
point.error = "Undecoded CFI: #{ cfi }"
log(point.error)
point
@ -192,7 +250,7 @@ class CanonicalFragmentIdentifier
cdoc = cd
cwin = cdoc.defaultView
target.normalize()
(if target.parentNode then target.parentNode else target).normalize()
if name in ['audio', 'video']
tail = "~" + fstr target.currentTime
@ -214,6 +272,67 @@ class CanonicalFragmentIdentifier
this.encode(doc, target, offset, tail)
# }}}
point: (cfi, doc=window?.document) -> # {{{
r = this.decode(cfi, doc)
if not r
return null
node = r.node
ndoc = node.ownerDocument
if not ndoc
log("CFI node has no owner document: #{ cfi } #{ node }")
return null
nwin = ndoc.defaultView
x = null
y = null
if typeof(r.offset) == "number"
# Character offset
range = ndoc.createRange()
if r.forward
try_list = [{start:0, end:0, a:0.5}, {start:0, end:1, a:1}, {start:-1, end:0, a:0}]
else
try_list = [{start:0, end:0, a:0.5}, {start:-1, end:0, a:0}, {start:0, end:1, a:1}]
k = 0
a = null
rects = null
node_len = node.nodeValue.length
until rects or rects.length or k >= try_list.length
t = try_list[k++]
start_offset = r.offset + t.start
end_offset = r.offset + t.end
a = t.a
if start_offset < 0 or end_offset >= node_len
continue
range.setStart(node, start_offset)
range.setEnd(node, end_offset)
rects = range.getClientRects()
if not rects or not rects.length
log("Could not find caret position: rects: #{ rects } offset: #{ r.offset }")
return null
rect = rects[0]
x = (a*rect.left + (1-a)*rect.right)
y = (rect.top + rect.bottom)/2
else
x = node.offsetLeft - nwin.scrollX
y = node.offsetTop - nwin.scrollY
if typeof(r.x) == "number" and node.offsetWidth
x += (r.x*node.offsetWidth)/100
y += (r.y*node.offsetHeight)/100
until ndoc == doc
node = nwin.frameElement
ndoc = node.ownerDocument
nwin = ndoc.defaultView
x += node.offsetLeft - nwin.scrollX
y += node.offsetTop - nwin.scrollY
{x:x, y:y, node:r.node, time:r.time}
# }}}
if window?
window.cfi = new CanonicalFragmentIdentifier()
else if process?

View File

@ -2,9 +2,9 @@
<html>
<head>
<title>Testing CFI functionality</title>
<script type="text/javascript" src="cfi.js"></script>
<script type="text/javascript" src="../cfi.coffee"></script>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="cfi-test.js"></script>
<script type="text/javascript" src="cfi-test.coffee"></script>
</head>
<body>
<h1 id="first-h1" style="border: solid 1px red">Testing CFI functionality</h1>

View File

@ -18,8 +18,8 @@ except ImportError:
def run_devel_server():
os.chdir(os.path.dirname(__file__))
serve(['../cfi.coffee', 'cfi-test.coffee'])
os.chdir(os.path.dirname(os.path.abspath(__file__)))
serve()
if __name__ == '__main__':
run_devel_server()

View File

@ -17,6 +17,7 @@ from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
XHTML_NS = 'http://www.w3.org/1999/xhtml'
XMLNS_NS = 'http://www.w3.org/2000/xmlns/'
class NotHTML(Exception):
@ -28,9 +29,7 @@ def barename(name):
return name.rpartition('}')[-1]
def namespace(name):
if '}' in name:
return name.split('}', 1)[0][1:]
return ''
return name.rpartition('}')[0][1:]
def XHTML(name):
return '{%s}%s' % (XHTML_NS, name)
@ -60,26 +59,86 @@ def merge_multiple_html_heads_and_bodies(root, log=None):
log.warn('Merging multiple <head> and <body> sections')
return root
def _html5_parse(data):
def clone_element(elem, nsmap={}, in_context=True):
if in_context:
maker = elem.getroottree().getroot().makeelement
else:
maker = etree.Element
nelem = maker(elem.tag, attrib=elem.attrib,
nsmap=nsmap)
nelem.text, nelem.tail = elem.text, elem.tail
nelem.extend(elem)
return nelem
def html5_parse(data):
import html5lib
data = html5lib.parse(data, treebuilder='lxml').getroot()
html_ns = [ns for ns, val in data.nsmap.iteritems() if (val == XHTML_NS and
ns is not None)]
if html_ns:
# html5lib causes the XHTML namespace to not
# be set as the default namespace
nsmap = dict(data.nsmap)
nsmap[None] = XHTML_NS
for x in html_ns:
nsmap.pop(x)
nroot = etree.Element(data.tag, nsmap=nsmap,
attrib=dict(data.attrib))
nroot.text = data.text
nroot.tail = data.tail
for child in data:
nroot.append(child)
data = nroot
return data
# Set lang correctly
xl = data.attrib.pop('xmlU0003Alang', None)
if xl is not None and 'lang' not in data.attrib:
data.attrib['lang'] = xl
# html5lib has the most inelegant handling of namespaces I have ever seen
# Try to reconstitute destroyed namespace info
xmlns_declaration = '{%s}'%XMLNS_NS
non_html5_namespaces = {}
seen_namespaces = set()
for elem in tuple(data.iter()):
elem.attrib.pop('xmlns', None)
namespaces = {}
for x in tuple(elem.attrib):
if x.startswith('xmlnsU') or x.startswith(xmlns_declaration):
# A namespace declaration
val = elem.attrib.pop(x)
if x.startswith('xmlnsU0003A'):
prefix = x[11:]
namespaces[prefix] = val
if namespaces:
# Some destroyed namespace declarations were found
p = elem.getparent()
if p is None:
# We handle the root node later
non_html5_namespaces = namespaces
else:
idx = p.index(elem)
p.remove(elem)
elem = clone_element(elem, nsmap=namespaces)
p.insert(idx, elem)
b = barename(elem.tag)
idx = b.find('U0003A')
if idx > -1:
prefix, tag = b[:idx], b[idx+6:]
ns = elem.nsmap.get(prefix, None)
if ns is None:
ns = non_html5_namespaces.get(prefix, None)
if ns is not None:
elem.tag = '{%s}%s'%(ns, tag)
for b in tuple(elem.attrib):
idx = b.find('U0003A')
if idx > -1:
prefix, tag = b[:idx], b[idx+6:]
ns = elem.nsmap.get(prefix, None)
if ns is None:
ns = non_html5_namespaces.get(prefix, None)
if ns is not None:
elem.attrib['{%s}%s'%(ns, tag)] = elem.attrib.pop(b)
seen_namespaces |= set(elem.nsmap.itervalues())
nsmap = dict(html5lib.constants.namespaces)
nsmap[None] = nsmap.pop('html')
non_html5_namespaces.update(nsmap)
nsmap = non_html5_namespaces
data = clone_element(data, nsmap=nsmap, in_context=False)
# Remove unused namespace declarations
fnsmap = {k:v for k,v in nsmap.iteritems() if v in seen_namespaces and v !=
XMLNS_NS}
return clone_element(data, nsmap=fnsmap, in_context=False)
def _html4_parse(data, prefer_soup=False):
if prefer_soup:
@ -177,7 +236,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
except etree.XMLSyntaxError:
log.debug('Parsing %s as HTML' % filename)
try:
data = _html5_parse(data)
data = html5_parse(data)
except:
log.exception(
'HTML 5 parsing failed, falling back to older parsers')
@ -261,6 +320,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
nroot.append(elem)
data = nroot
data = merge_multiple_html_heads_and_bodies(data, log)
# Ensure has a <head/>
head = xpath(data, '/h:html/h:head')

View File

@ -159,15 +159,18 @@ class FlatFilenames(object): # {{{
continue
data = item.data
isp = item.spine_position
nhref = oeb.manifest.generate(href=nhref)[1]
if isp is not None:
oeb.spine.remove(item)
oeb.manifest.remove(item)
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
fallback=item.fallback)
self.rename_map[item.href] = nhref
self.renamed_items_map[nhref] = item
if item.spine_position is not None:
oeb.spine.insert(item.spine_position, nitem, item.linear)
oeb.spine.remove(item)
oeb.manifest.remove(item)
if isp is not None:
oeb.spine.insert(isp, nitem, item.linear)
if self.rename_map:
self.log('Found non-flat filenames, renaming to support broken'

View File

@ -16,7 +16,7 @@ class UnsmartenPunctuation(object):
def unsmarten(self, root):
for x in self.html_tags(root):
if not barename(x) == 'pre':
if not barename(x.tag) == 'pre':
if getattr(x, 'text', None):
x.text = unsmarten_text(x.text)
if getattr(x, 'tail', None) and x.tail:

View File

@ -273,11 +273,34 @@ def error_dialog(parent, title, msg, det_msg='', show=False,
return d
def question_dialog(parent, title, msg, det_msg='', show_copy_button=False,
default_yes=True):
default_yes=True,
# Skippable dialogs
# Set skip_dialog_name to a unique name for this dialog
# Set skip_dialog_msg to a message displayed to the user
skip_dialog_name=None, skip_dialog_msg=_('Show this confirmation again'),
skip_dialog_skipped_value=True, skip_dialog_skip_precheck=True):
from calibre.gui2.dialogs.message_box import MessageBox
auto_skip = set(gprefs.get('questions_to_auto_skip', []))
if (skip_dialog_name is not None and skip_dialog_name in auto_skip):
return bool(skip_dialog_skipped_value)
d = MessageBox(MessageBox.QUESTION, title, msg, det_msg, parent=parent,
show_copy_button=show_copy_button, default_yes=default_yes)
return d.exec_() == d.Accepted
if skip_dialog_name is not None and skip_dialog_msg:
tc = d.toggle_checkbox
tc.setVisible(True)
tc.setText(skip_dialog_msg)
tc.setChecked(bool(skip_dialog_skip_precheck))
ret = d.exec_() == d.Accepted
if skip_dialog_name is not None and not d.toggle_checkbox.isChecked():
auto_skip.add(skip_dialog_name)
gprefs.set('questions_to_auto_skip', list(auto_skip))
return ret
def info_dialog(parent, title, msg, det_msg='', show=False,
show_copy_button=True):

View File

@ -683,7 +683,7 @@ class DeviceMixin(object): # {{{
return self.ask_a_yes_no_question(
_('No suitable formats'), msg,
ans_when_user_unavailable=True,
det_msg=autos
det_msg=autos, skip_dialog_name='auto_convert_before_send'
)
def set_default_thumbnail(self, height):

View File

@ -12,7 +12,7 @@ from PyQt4.Qt import QDialog, QApplication
from calibre.gui2.dialogs.add_from_isbn_ui import Ui_Dialog
from calibre.ebooks.metadata import check_isbn
from calibre.constants import iswindows
from calibre.gui2 import gprefs
from calibre.gui2 import gprefs, question_dialog, error_dialog
class AddFromISBN(QDialog, Ui_Dialog):
@ -44,6 +44,7 @@ class AddFromISBN(QDialog, Ui_Dialog):
tags = list(filter(None, [x.strip() for x in tags]))
gprefs['add from ISBN tags'] = tags
self.set_tags = tags
bad = set()
for line in unicode(self.isbn_box.toPlainText()).strip().splitlines():
line = line.strip()
if not line:
@ -64,5 +65,19 @@ class AddFromISBN(QDialog, Ui_Dialog):
os.access(parts[1], os.R_OK) and os.path.isfile(parts[1]):
book['path'] = parts[1]
self.books.append(book)
else:
bad.add(parts[0])
if bad:
if self.books:
if not question_dialog(self, _('Some invalid ISBNs'),
_('Some of the ISBNs you entered were invalid. They will'
' be ignored. Click Show Details to see which ones.'
' Do you want to proceed?'), det_msg='\n'.join(bad),
show_copy_button=True):
return
else:
return error_dialog(self, _('All invalid ISBNs'),
_('All the ISBNs you entered were invalid. No books'
' can be added.'), show=True)
QDialog.accept(self, *args)

View File

@ -44,6 +44,7 @@ class MessageBox(QDialog, Ui_Dialog): # {{{
self.msg.setText(msg)
self.det_msg.setPlainText(det_msg)
self.det_msg.setVisible(False)
self.toggle_checkbox.setVisible(False)
if show_copy_button:
self.ctc_button = self.bb.addButton(_('&Copy to clipboard'),

View File

@ -53,7 +53,7 @@
</property>
</widget>
</item>
<item row="2" column="0" colspan="2">
<item row="3" column="0" colspan="2">
<widget class="QDialogButtonBox" name="bb">
<property name="orientation">
<enum>Qt::Horizontal</enum>
@ -63,6 +63,13 @@
</property>
</widget>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="toggle_checkbox">
<property name="text">
<string/>
</property>
</widget>
</item>
</layout>
</widget>
<resources>

View File

@ -143,12 +143,12 @@ class GuiRunner(QObject):
add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False)
sys.excepthook = main.unhandled_exception
if len(self.args) > 1:
p = os.path.abspath(self.args[1])
if os.path.isdir(p):
prints('Ignoring directory passed as command line argument:',
self.args[1])
else:
add_filesystem_book(p)
files = [os.path.abspath(p) for p in self.args[1:] if not
os.path.isdir(p)]
if len(files) < len(sys.argv[1:]):
prints('Ignoring directories passed as command line arguments')
if files:
add_filesystem_book(files)
self.app.file_event_hook = add_filesystem_book
self.main = main

View File

@ -162,6 +162,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
for key in dynamic.keys():
if key.endswith('_again') and dynamic[key] is False:
dynamic[key] = True
gprefs['questions_to_auto_skip'] = []
info_dialog(self, _('Done'),
_('Confirmation dialogs have all been reset'), show=True)

View File

@ -260,11 +260,11 @@ class ShortcutConfig(QWidget):
self.view.setModel(model)
self.delegate = Delegate()
self.view.setItemDelegate(self.delegate)
self.delegate.sizeHintChanged.connect(self.scrollTo)
self.delegate.sizeHintChanged.connect(self.scrollTo,
type=Qt.QueuedConnection)
def scrollTo(self, index):
self.view.scrollTo(index)
self.view.scrollTo(index, self.view.EnsureVisible)
@property
def is_editing(self):

View File

@ -54,12 +54,12 @@ class GandalfStore(BasicStoreConfig, StorePlugin):
if not id:
continue
cover_url = ''.join(data.xpath('.//img/@src'))
cover_url = ''.join(data.xpath('.//div[@class="info"]/h3/a/@id'))
title = ''.join(data.xpath('.//div[@class="info"]/h3/a/@title'))
formats = title.split()
formats = formats[-1]
formats = ''.join(data.xpath('.//div[@class="info"]/p[1]/text()'))
formats = re.findall(r'\((.*?)\)',formats)[0]
author = ''.join(data.xpath('.//div[@class="info"]/h4/text() | .//div[@class="info"]/h4/span/text()'))
price = ''.join(data.xpath('.//h3[@class="promocja"]/text()'))
price = ''.join(data.xpath('.//div[@class="options"]/h3/text()'))
price = re.sub('PLN', '', price)
price = re.sub('\.', ',', price)
drm = data.xpath('boolean(.//div[@class="info" and contains(., "Zabezpieczenie: DRM")])')
@ -67,7 +67,7 @@ class GandalfStore(BasicStoreConfig, StorePlugin):
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.cover_url = 'http://imguser.gandalf.com.pl/' + re.sub('p', 'p_', cover_url) + '.jpg'
s.title = title.strip()
s.author = author.strip()
s.price = price

View File

@ -407,11 +407,14 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
return getattr(self, '__systray_minimized', False)
def ask_a_yes_no_question(self, title, msg, det_msg='',
show_copy_button=False, ans_when_user_unavailable=True):
show_copy_button=False, ans_when_user_unavailable=True,
skip_dialog_name=None, skipped_value=True):
if self.is_minimized_to_tray:
return ans_when_user_unavailable
return question_dialog(self, title, msg, det_msg=det_msg,
show_copy_button=show_copy_button)
show_copy_button=show_copy_button,
skip_dialog_name=skip_dialog_name,
skip_dialog_skipped_value=skipped_value)
def hide_windows(self):
for window in QApplication.topLevelWidgets():

View File

@ -269,7 +269,7 @@ How do I use |app| with my Android phone/tablet?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are two ways that you can connect your Android device to calibre. Using a USB cable-- or wirelessly, over the air.
**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**.
**The USB cable method only works if your Android device can act as a USB disk, that means in windows it must have a drive letter, like K:**. Some devices may have a setting to put them in "disk mode" or "USB Transfer mode" that is needed before they act as USB disks.
Using a USB cable
^^^^^^^^^^^^^^^^^^^^

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -4,9 +4,9 @@
#
msgid ""
msgstr ""
"Project-Id-Version: calibre 0.8.31\n"
"POT-Creation-Date: 2011-12-16 09:38+IST\n"
"PO-Revision-Date: 2011-12-16 09:38+IST\n"
"Project-Id-Version: calibre 0.8.32\n"
"POT-Creation-Date: 2011-12-23 08:40+IST\n"
"PO-Revision-Date: 2011-12-23 08:40+IST\n"
"Last-Translator: Automatically generated\n"
"Language-Team: LANGUAGE\n"
"MIME-Version: 1.0\n"
@ -24,8 +24,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/db/cache.py:105
#: /home/kovid/work/calibre/src/calibre/db/cache.py:108
#: /home/kovid/work/calibre/src/calibre/db/cache.py:119
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:282
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:283
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:285
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:286
#: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:99
#: /home/kovid/work/calibre/src/calibre/devices/hanvon/driver.py:100
#: /home/kovid/work/calibre/src/calibre/devices/jetbook/driver.py:74
@ -36,8 +36,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:71
#: /home/kovid/work/calibre/src/calibre/devices/prs500/books.py:267
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:660
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:328
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:329
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:330
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:331
#: /home/kovid/work/calibre/src/calibre/devices/usbms/driver.py:485
#: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:106
#: /home/kovid/work/calibre/src/calibre/ebooks/chm/input.py:109
@ -97,17 +97,17 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:85
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:128
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:169
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:748
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1005
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1007
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1009
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:749
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1006
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1008
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:1010
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/utils.py:299
#: /home/kovid/work/calibre/src/calibre/ebooks/mobi/writer2/indexer.py:497
#: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:145
#: /home/kovid/work/calibre/src/calibre/ebooks/odt/input.py:147
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:818
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:273
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:277
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:333
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/parse_utils.py:337
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:142
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/reader.py:149
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/jacket.py:66
@ -137,8 +137,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/pdf/writer.py:103
#: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:320
#: /home/kovid/work/calibre/src/calibre/ebooks/rtf/input.py:322
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:387
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:395
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:410
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:418
#: /home/kovid/work/calibre/src/calibre/gui2/actions/add.py:157
#: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:378
#: /home/kovid/work/calibre/src/calibre/gui2/actions/edit_metadata.py:381
@ -889,15 +889,15 @@ msgstr ""
msgid "Communicate with Android phones."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:152
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:154
msgid "Comma separated list of directories to send e-books to on the device. The first one that exists will be used"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:222
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:225
msgid "Communicate with S60 phones."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:241
#: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:244
msgid "Communicate with WebOS tablets."
msgstr ""
@ -993,8 +993,8 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/devices/nook/driver.py:102
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:447
#: /home/kovid/work/calibre/src/calibre/devices/prs505/sony_cache.py:470
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:547
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:566
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:549
#: /home/kovid/work/calibre/src/calibre/devices/prst1/driver.py:568
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1052
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1058
#: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:1093
@ -2036,7 +2036,7 @@ msgid "When creating a TOC from links in the input document, allow duplicate ent
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:281
msgid "An XPath expression to detect chapter titles. The default is to consider <h1> or <h2> tags that contain the words \"chapter\",\"book\",\"section\" or \"part\" as chapter titles as well as any tags that have class=\"chapter\". The expression used must evaluate to a list of elements. To disable chapter detection, use the expression \"/\". See the XPath Tutorial in the calibre User Manual for further help on using this feature."
msgid "An XPath expression to detect chapter titles. The default is to consider <h1> or <h2> tags that contain the words \"chapter\",\"book\",\"section\", \"prologue\", \"epilogue\", or \"part\" as chapter titles as well as any tags that have class=\"chapter\". The expression used must evaluate to a list of elements. To disable chapter detection, use the expression \"/\". See the XPath Tutorial in the calibre User Manual for further help on using this feature."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/conversion/plumber.py:295
@ -2934,7 +2934,7 @@ msgid ""
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1417
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1238
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1244
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:943
#: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:41
msgid "Cover"
@ -3085,70 +3085,70 @@ msgstr ""
msgid "No details available"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1239
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1245
msgid "Title Page"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1240
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1246
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/transforms/htmltoc.py:15
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main.py:56
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:199
msgid "Table of Contents"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1241
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1247
msgid "Index"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1242
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1248
msgid "Glossary"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1243
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1249
msgid "Acknowledgements"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1244
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1250
msgid "Bibliography"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1245
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1251
msgid "Colophon"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1246
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1252
msgid "Copyright"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1247
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1253
msgid "Dedication"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1248
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1254
msgid "Epigraph"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1249
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1255
msgid "Foreword"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1250
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1256
msgid "List of Illustrations"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1251
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1257
msgid "List of Tables"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1252
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1258
msgid "Notes"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1253
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1259
msgid "Preface"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1254
#: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1260
msgid "Main Text"
msgstr ""
@ -3681,7 +3681,11 @@ msgstr ""
msgid "tag browser categories not to display"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:506
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:280
msgid "Show this confirmation again"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/__init__.py:529
msgid "Choose Files"
msgstr ""
@ -4186,7 +4190,7 @@ msgid "Create a catalog of the books in your calibre library"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/actions/convert.py:88
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:591
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:594
msgid "Cannot convert"
msgstr ""
@ -6643,7 +6647,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/convert/single_ui.py:117
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/book_info_ui.py:69
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/comicconf_ui.py:96
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box_ui.py:52
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box_ui.py:21
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/progress_ui.py:53
#: /home/kovid/work/calibre/src/calibre/gui2/store/mobileread_store_dialog_ui.py:61
#: /home/kovid/work/calibre/src/calibre/gui2/store/stores/mobileread/cache_progress_dialog_ui.py:50
@ -7279,6 +7283,22 @@ msgstr ""
msgid "Reset author to Unknown"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:72
msgid "Some invalid ISBNs"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:73
msgid "Some of the ISBNs you entered were invalid. They will be ignored. Click Show Details to see which ones. Do you want to proceed?"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:79
msgid "All invalid ISBNs"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn.py:80
msgid "All the ISBNs you entered were invalid. No books can be added."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/add_from_isbn_ui.py:63
msgid "Add books by ISBN"
msgstr ""
@ -7920,35 +7940,35 @@ msgstr ""
msgid "&Hide all jobs"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:49
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:50
msgid "&Copy to clipboard"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:53
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:54
msgid "Show &details"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:54
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:55
msgid "Hide &details"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:58
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:59
msgid "Show detailed information about this error"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:100
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:101
#: /home/kovid/work/calibre/src/calibre/gui2/wizard/__init__.py:552
msgid "Copied"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:138
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:139
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:796
#: /home/kovid/work/calibre/src/calibre/gui2/viewer/main_ui.py:205
msgid "Copy to clipboard"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:184
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:232
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:185
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/message_box.py:233
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:860
#: /home/kovid/work/calibre/src/calibre/gui2/metadata/single_download.py:961
msgid "View log"
@ -10063,7 +10083,7 @@ msgid "None"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/keyboard.py:389
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:165
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:166
msgid "Done"
msgstr ""
@ -10404,7 +10424,7 @@ msgid "Failed to create calibre library at: %r."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:108
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:170
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:169
msgid "Choose a location for your new calibre e-book library"
msgstr ""
@ -10412,74 +10432,74 @@ msgstr ""
msgid "Initializing user interface..."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:164
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:163
msgid "Repairing failed"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:165
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:164
msgid "The database repair failed. Starting with a new empty library."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:179
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:204
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:178
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:203
msgid "Bad database location"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:180
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:179
#, python-format
msgid "Bad database location %r. calibre will now quit."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:192
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:494
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:191
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:497
msgid "Corrupted database"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:193
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:192
#, python-format
msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful. If you say No, a new empty calibre library will be created."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:205
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:204
#, python-format
msgid "Bad database location %r. Will start with a new, empty calibre library"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:215
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:214
#, python-format
msgid "Starting %s: Loading books..."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:295
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:294
msgid "If you are sure it is not running"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:298
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:297
msgid "may be running in the system tray, in the"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:300
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:299
msgid "upper right region of the screen."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:302
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:301
msgid "lower right region of the screen."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:305
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:304
msgid "try rebooting your computer."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:307
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:321
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:306
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:320
msgid "try deleting the file"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:310
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:309
msgid "Cannot Start "
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:311
#: /home/kovid/work/calibre/src/calibre/gui2/main.py:310
#, python-format
msgid "%s is already running."
msgstr ""
@ -11097,7 +11117,7 @@ msgstr ""
msgid "All on 1 tab"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:166
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/behavior.py:167
msgid "Confirmation dialogs have all been reset"
msgstr ""
@ -12623,7 +12643,7 @@ msgid "Here you can control how calibre will save your books when you click the
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/preferences/server.py:70
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:432
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:435
msgid "Failed to start content server"
msgstr ""
@ -13791,39 +13811,39 @@ msgstr ""
msgid "You have started calibre in debug mode. After you quit calibre, the debug log will be available in the file: %s<p>The log will be displayed automatically."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:495
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:498
#, python-format
msgid "The library database at %s appears to be corrupted. Do you want calibre to try and rebuild it automatically? The rebuild may not be completely successful."
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:579
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:582
msgid "Conversion Error"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:602
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:605
msgid "Recipe Disabled"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:618
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:621
msgid "<b>Failed</b>"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:652
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:655
msgid "There are active jobs. Are you sure you want to quit?"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:655
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:658
msgid ""
" is communicating with the device!<br>\n"
" Quitting may cause corruption on the device.<br>\n"
" Are you sure you want to quit?"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:659
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:662
msgid "Active jobs"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:727
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:730
msgid "will keep running in the system tray. To close it, choose <b>Quit</b> in the context menu of the system tray."
msgstr ""

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More