diff --git a/recipes/arcamax.recipe b/recipes/arcamax.recipe
index db4d753cef..d1c1c6766d 100644
--- a/recipes/arcamax.recipe
+++ b/recipes/arcamax.recipe
@@ -93,7 +93,7 @@ class Arcamax(BasicNewsRecipe):
for page in pages:
page_soup = self.index_to_soup(url)
if page_soup:
- title = page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0]
+ title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0])
page_url = url
# orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
@@ -127,4 +127,3 @@ class Arcamax(BasicNewsRecipe):
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
-
diff --git a/recipes/china_times.recipe b/recipes/china_times.recipe
new file mode 100644
index 0000000000..8c1493d71f
--- /dev/null
+++ b/recipes/china_times.recipe
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+__license__ = 'GPL v3'
+# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1277443634(BasicNewsRecipe):
+ title = u'中時電子報'
+ oldest_article = 1
+ max_articles_per_feed = 100
+
+ feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'),
+ (u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'),
+ (u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'),
+ (u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'),
+ (u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'),
+ (u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'),
+ (u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'),
+ (u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'),
+ (u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'),
+ (u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'),
+ #(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'),
+ #(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
+ #(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
+ ]
+
+ __author__ = 'einstuerzende, updated by Eddie Lau'
+ __version__ = '1.0'
+ language = 'zh'
+ publisher = 'China Times Group'
+ description = 'China Times (Taiwan)'
+ category = 'News, Chinese, Taiwan'
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ encoding = 'big5'
+ conversion_options = {'linearize_tables':True}
+ masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
+ cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
+ keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
+ remove_tags = [dict(name='div', attrs={'class':['focus-news']})]
+
diff --git a/recipes/divahair.recipe b/recipes/divahair.recipe
new file mode 100644
index 0000000000..978ac19808
--- /dev/null
+++ b/recipes/divahair.recipe
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+divahair.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DivaHair(BasicNewsRecipe):
+ title = u'Diva Hair'
+ language = 'ro'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Coafuri, frizuri, tunsori ..'
+ publisher = u'Diva Hair'
+ category = u'Ziare,Stiri,Coafuri,Femei'
+ oldest_article = 5
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ encoding = 'utf-8'
+ remove_javascript = True
+ cover_url = 'http://www.divahair.ro/imgs/logo.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='td', attrs={'class':'spatiuart'})
+ , dict(name='div', attrs={'class':'spatiuart'})
+ ]
+
+
+ remove_tags = [
+ dict(name='div', attrs={'class':'categorie'})
+ , dict(name='div', attrs={'class':'gri gri2 detaliiart'})
+ , dict(name='div', attrs={'class':'articol_box_bottom'})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'class':'articol_box_bottom'})
+ ]
+
+ feeds = [ (u'\u0218tiri', u'http://www.divahair.ro/feed') ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/recipes/icons/divahair.png b/recipes/icons/divahair.png
new file mode 100644
index 0000000000..4cb2964687
Binary files /dev/null and b/recipes/icons/divahair.png differ
diff --git a/recipes/icons/mayra.png b/recipes/icons/mayra.png
new file mode 100644
index 0000000000..bbc0c8d259
Binary files /dev/null and b/recipes/icons/mayra.png differ
diff --git a/recipes/icons/moldovaazi.png b/recipes/icons/moldovaazi.png
new file mode 100644
index 0000000000..1fc3532295
Binary files /dev/null and b/recipes/icons/moldovaazi.png differ
diff --git a/recipes/icons/newsmoldova.png b/recipes/icons/newsmoldova.png
new file mode 100644
index 0000000000..a4faaa4635
Binary files /dev/null and b/recipes/icons/newsmoldova.png differ
diff --git a/recipes/liberty_times.recipe b/recipes/liberty_times.recipe
new file mode 100644
index 0000000000..c3a9d106da
--- /dev/null
+++ b/recipes/liberty_times.recipe
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+__license__ = 'GPL v3'
+# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
+
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1277443634(BasicNewsRecipe):
+ title = u'自由電子報'
+ oldest_article = 1
+ max_articles_per_feed = 100
+
+ feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'),
+ (u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'),
+ (u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'),
+ (u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'),
+ (u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'),
+ (u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'),
+ (u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'),
+ (u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'),
+ (u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'),
+ (u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'),
+ (u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'),
+ (u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'),
+ (u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'),
+ (u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'),
+ (u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'),
+ ]
+ extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
+ __author__ = 'einstuerzende, updated by Eddie Lau'
+ __version__ = '1.1'
+ language = 'zh'
+ publisher = 'Liberty Times Group'
+ description = 'Liberty Times (Taiwan)'
+ category = 'News, Chinese, Taiwan'
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ encoding = 'big5'
+ conversion_options = {'linearize_tables':True}
+ masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
+ cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
+ keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]
+
diff --git a/recipes/mayra.recipe b/recipes/mayra.recipe
new file mode 100644
index 0000000000..ae449d819d
--- /dev/null
+++ b/recipes/mayra.recipe
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+mayra.ro
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Mayra(BasicNewsRecipe):
+ title = u'Mayra'
+ language = 'ro'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Traieste urban, cool, sexy'
+ publisher = 'Mayra'
+ category = 'Ziare,Stiri,Reviste'
+ oldest_article = 5
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ encoding = 'utf-8'
+ remove_javascript = True
+ cover_url = 'http://img.konkurs.ro/img/concursuri-cu-premii/147/14672_front.jpg'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [
+ dict(name='div', attrs={'id':'article_details'})
+ ]
+
+ remove_tags = [
+ dict(name='div', attrs={'id':'LikePluginPagelet'})
+ , dict(name='p', attrs={'id':'tags'})
+ , dict(name='span', attrs={'id':'tweet-button'})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'id':'LikePluginPagelet'})
+ ]
+
+ feeds = [ (u'\u0218tiri', u'http://www.mayra.ro/rss') ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/recipes/ming_pao.recipe b/recipes/ming_pao.recipe
index 4a405a59dd..08ee20cb15 100644
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@@ -1,15 +1,18 @@
+# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau'
-# Users of Kindle 3 (with limited system-level CJK support)
+# Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False".
__MakePeriodical__ = True
-# Turn it to True if your device supports display of CJK titles
+# Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False
-
+# Trun below to true if you wish to use life.mingpao.com as the main article source
+__UseLife__ = True
'''
Change Log:
+2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
@@ -32,41 +35,43 @@ import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
+
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe):
- title = 'Ming Pao - Hong Kong'
- oldest_article = 1
- max_articles_per_feed = 100
- __author__ = 'Eddie Lau'
- description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
- publisher = 'MingPao'
- category = 'Chinese, News, Hong Kong'
- remove_javascript = True
- use_embedded_content = False
- no_stylesheets = True
- language = 'zh'
- encoding = 'Big5-HKSCS'
- recursions = 0
- conversion_options = {'linearize_tables':True}
- timefmt = ''
- extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
- masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
- keep_only_tags = [dict(name='h1'),
+ title = 'Ming Pao - Hong Kong'
+ oldest_article = 1
+ max_articles_per_feed = 100
+ __author__ = 'Eddie Lau'
+ description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
+ publisher = 'MingPao'
+ category = 'Chinese, News, Hong Kong'
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ language = 'zh'
+ encoding = 'Big5-HKSCS'
+ recursions = 0
+ conversion_options = {'linearize_tables':True}
+ timefmt = ''
+ extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
+ masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
+ keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}),
- dict(attrs={'class':['photo']})
+ dict(attrs={'class':['photo']}),
+ dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
]
- remove_tags = [dict(name='style'),
- dict(attrs={'id':['newscontent135']}), # for the finance page
- dict(name='table')] # for content fetched from life.mingpao.com
- remove_attributes = ['width']
- preprocess_regexps = [
+ remove_tags = [dict(name='style'),
+ dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
+ dict(name='table')] # for content fetched from life.mingpao.com
+ remove_attributes = ['width']
+ preprocess_regexps = [
(re.compile(r'
', re.DOTALL|re.IGNORECASE),
lambda match: ''),
(re.compile(r'
', re.DOTALL|re.IGNORECASE),
@@ -80,10 +85,10 @@ class MPHKRecipe(BasicNewsRecipe):
lambda match: "")
]
- def image_url_processor(cls, baseurl, url):
- # trick: break the url at the first occurance of digit, add an additional
- # '_' at the front
- # not working, may need to move this to preprocess_html() method
+ def image_url_processor(cls, baseurl, url):
+ # trick: break the url at the first occurance of digit, add an additional
+ # '_' at the front
+ # not working, may need to move this to preprocess_html() method
# minIdx = 10000
# i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx:
@@ -115,314 +120,357 @@ class MPHKRecipe(BasicNewsRecipe):
# i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx:
# minIdx = i9
- return url
+ return url
- def get_dtlocal(self):
- dt_utc = datetime.datetime.utcnow()
- # convert UTC to local hk time - at around HKT 6.00am, all news are available
- dt_local = dt_utc - datetime.timedelta(-2.0/24)
- return dt_local
+ def get_dtlocal(self):
+ dt_utc = datetime.datetime.utcnow()
+ # convert UTC to local hk time - at around HKT 6.00am, all news are available
+ dt_local = dt_utc - datetime.timedelta(-2.0/24)
+ return dt_local
- def get_fetchdate(self):
- return self.get_dtlocal().strftime("%Y%m%d")
+ def get_fetchdate(self):
+ return self.get_dtlocal().strftime("%Y%m%d")
- def get_fetchformatteddate(self):
- return self.get_dtlocal().strftime("%Y-%m-%d")
+ def get_fetchformatteddate(self):
+ return self.get_dtlocal().strftime("%Y-%m-%d")
- def get_fetchday(self):
- # convert UTC to local hk time - at around HKT 6.00am, all news are available
- return self.get_dtlocal().strftime("%d")
+ def get_fetchday(self):
+ # dt_utc = datetime.datetime.utcnow()
+ # convert UTC to local hk time - at around HKT 6.00am, all news are available
+ # dt_local = dt_utc - datetime.timedelta(-2.0/24)
+ return self.get_dtlocal().strftime("%d")
- def get_cover_url(self):
- cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
- br = BasicNewsRecipe.get_browser()
- try:
- br.open(cover)
- except:
- cover = None
- return cover
+ def get_cover_url(self):
+ cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
+ br = BasicNewsRecipe.get_browser()
+ try:
+ br.open(cover)
+ except:
+ cover = None
+ return cover
- def parse_index(self):
- feeds = []
- dateStr = self.get_fetchdate()
+ def parse_index(self):
+ feeds = []
+ dateStr = self.get_fetchdate()
- for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
- (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
- (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
- articles = self.parse_section(url)
- if articles:
- feeds.append((title, articles))
+ if __UseLife__:
+ for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
+ (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
+ (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
+ (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
+ (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
+ (u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
+ (u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
+ (u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
+ (u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
+ (u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
+ (u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
+ articles = self.parse_section2(url, keystr)
+ if articles:
+ feeds.append((title, articles))
- # special- editorial
- ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
- if ed_articles:
- feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+ for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+ (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+ else:
+ for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
+ (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
+ (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
- for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
- (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
- (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
- articles = self.parse_section(url)
- if articles:
- feeds.append((title, articles))
+ # special- editorial
+ ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+ if ed_articles:
+ feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
- # special - finance
- #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
- fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
- if fin_articles:
- feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+ for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
+ (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
+ (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
- for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
- (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
- articles = self.parse_section(url)
- if articles:
- feeds.append((title, articles))
+ # special - finance
+ #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
+ fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+ if fin_articles:
+ feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
- # special - entertainment
- ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
- if ent_articles:
- feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+ for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+ (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
- for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
- (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
- articles = self.parse_section(url)
- if articles:
- feeds.append((title, articles))
+ # special - entertainment
+ ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+ if ent_articles:
+ feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+
+ for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
+ (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
- # special- columns
- col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
- if col_articles:
- feeds.append((u'\u5c08\u6b04 Columns', col_articles))
+ # special- columns
+ col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
+ if col_articles:
+ feeds.append((u'\u5c08\u6b04 Columns', col_articles))
- return feeds
+ return feeds
- def parse_section(self, url):
- dateStr = self.get_fetchdate()
- soup = self.index_to_soup(url)
- divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
- current_articles = []
- included_urls = []
- divs.reverse()
- for i in divs:
- a = i.find('a', href = True)
- title = self.tag_to_string(a)
- url = a.get('href', False)
- url = 'http://news.mingpao.com/' + dateStr + '/' +url
- if url not in included_urls and url.rfind('Redirect') == -1:
- current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
- included_urls.append(url)
- current_articles.reverse()
- return current_articles
+ # parse from news.mingpao.com
+ def parse_section(self, url):
+ dateStr = self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
+ current_articles = []
+ included_urls = []
+ divs.reverse()
+ for i in divs:
+ a = i.find('a', href = True)
+ title = self.tag_to_string(a)
+ url = a.get('href', False)
+ url = 'http://news.mingpao.com/' + dateStr + '/' +url
+ if url not in included_urls and url.rfind('Redirect') == -1:
+ current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- def parse_ed_section(self, url):
- self.get_fetchdate()
- soup = self.index_to_soup(url)
- a = soup.findAll('a', href=True)
- a.reverse()
- current_articles = []
- included_urls = []
- for i in a:
- title = self.tag_to_string(i)
- url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
- if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
- current_articles.append({'title': title, 'url': url, 'description': ''})
- included_urls.append(url)
- current_articles.reverse()
- return current_articles
+ # parse from life.mingpao.com
+ def parse_section2(self, url, keystr):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- def parse_fin_section(self, url):
- self.get_fetchdate()
- soup = self.index_to_soup(url)
- a = soup.findAll('a', href= True)
- current_articles = []
- included_urls = []
- for i in a:
- #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
- url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
- #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
- if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
- title = self.tag_to_string(i)
- current_articles.append({'title': title, 'url': url, 'description':''})
- included_urls.append(url)
- return current_articles
+ def parse_ed_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- def parse_ent_section(self, url):
- self.get_fetchdate()
- soup = self.index_to_soup(url)
- a = soup.findAll('a', href=True)
- a.reverse()
- current_articles = []
- included_urls = []
- for i in a:
- title = self.tag_to_string(i)
- url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
- if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
- current_articles.append({'title': title, 'url': url, 'description': ''})
- included_urls.append(url)
- current_articles.reverse()
- return current_articles
+ def parse_fin_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href= True)
+ current_articles = []
+ included_urls = []
+ for i in a:
+ #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
+ if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
+ title = self.tag_to_string(i)
+ current_articles.append({'title': title, 'url': url, 'description':''})
+ included_urls.append(url)
+ return current_articles
- def parse_col_section(self, url):
- self.get_fetchdate()
- soup = self.index_to_soup(url)
- a = soup.findAll('a', href=True)
- a.reverse()
- current_articles = []
- included_urls = []
- for i in a:
- title = self.tag_to_string(i)
- url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
- if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
- current_articles.append({'title': title, 'url': url, 'description': ''})
- included_urls.append(url)
- current_articles.reverse()
- return current_articles
+ def parse_ent_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- def preprocess_html(self, soup):
- for item in soup.findAll(style=True):
- del item['style']
- for item in soup.findAll(style=True):
- del item['width']
- for item in soup.findAll(stype=True):
- del item['absmiddle']
- return soup
+ def parse_col_section(self, url):
+ self.get_fetchdate()
+ soup = self.index_to_soup(url)
+ a = soup.findAll('a', href=True)
+ a.reverse()
+ current_articles = []
+ included_urls = []
+ for i in a:
+ title = self.tag_to_string(i)
+ url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
+ if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
+ current_articles.append({'title': title, 'url': url, 'description': ''})
+ included_urls.append(url)
+ current_articles.reverse()
+ return current_articles
- def create_opf(self, feeds, dir=None):
- if dir is None:
- dir = self.output_dir
- if __UseChineseTitle__ == True:
- title = u'\u660e\u5831 (\u9999\u6e2f)'
- else:
- title = self.short_title()
- # if not generating a periodical, force date to apply in title
- if __MakePeriodical__ == False:
- title = title + ' ' + self.get_fetchformatteddate()
- if True:
- mi = MetaInformation(title, [self.publisher])
- mi.publisher = self.publisher
- mi.author_sort = self.publisher
- if __MakePeriodical__ == True:
- mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
- else:
- mi.publication_type = self.publication_type+':'+self.short_title()
- #mi.timestamp = nowf()
- mi.timestamp = self.get_dtlocal()
- mi.comments = self.description
- if not isinstance(mi.comments, unicode):
- mi.comments = mi.comments.decode('utf-8', 'replace')
- #mi.pubdate = nowf()
- mi.pubdate = self.get_dtlocal()
- opf_path = os.path.join(dir, 'index.opf')
- ncx_path = os.path.join(dir, 'index.ncx')
- opf = OPFCreator(dir, mi)
- # Add mastheadImage entry to section
- mp = getattr(self, 'masthead_path', None)
- if mp is not None and os.access(mp, os.R_OK):
- from calibre.ebooks.metadata.opf2 import Guide
- ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
- ref.type = 'masthead'
- ref.title = 'Masthead Image'
- opf.guide.append(ref)
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ for item in soup.findAll(style=True):
+ del item['width']
+ for item in soup.findAll(stype=True):
+ del item['absmiddle']
+ return soup
- manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
- manifest.append(os.path.join(dir, 'index.html'))
- manifest.append(os.path.join(dir, 'index.ncx'))
+ def create_opf(self, feeds, dir=None):
+ if dir is None:
+ dir = self.output_dir
+ if __UseChineseTitle__ == True:
+ title = u'\u660e\u5831 (\u9999\u6e2f)'
+ else:
+ title = self.short_title()
+ # if not generating a periodical, force date to apply in title
+ if __MakePeriodical__ == False:
+ title = title + ' ' + self.get_fetchformatteddate()
+ if True:
+ mi = MetaInformation(title, [self.publisher])
+ mi.publisher = self.publisher
+ mi.author_sort = self.publisher
+ if __MakePeriodical__ == True:
+ mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+ else:
+ mi.publication_type = self.publication_type+':'+self.short_title()
+ #mi.timestamp = nowf()
+ mi.timestamp = self.get_dtlocal()
+ mi.comments = self.description
+ if not isinstance(mi.comments, unicode):
+ mi.comments = mi.comments.decode('utf-8', 'replace')
+ #mi.pubdate = nowf()
+ mi.pubdate = self.get_dtlocal()
+ opf_path = os.path.join(dir, 'index.opf')
+ ncx_path = os.path.join(dir, 'index.ncx')
+ opf = OPFCreator(dir, mi)
+ # Add mastheadImage entry to section
+ mp = getattr(self, 'masthead_path', None)
+ if mp is not None and os.access(mp, os.R_OK):
+ from calibre.ebooks.metadata.opf2 import Guide
+ ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+ ref.type = 'masthead'
+ ref.title = 'Masthead Image'
+ opf.guide.append(ref)
- # Get cover
- cpath = getattr(self, 'cover_path', None)
- if cpath is None:
- pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
- if self.default_cover(pf):
- cpath = pf.name
- if cpath is not None and os.access(cpath, os.R_OK):
- opf.cover = cpath
- manifest.append(cpath)
+ manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+ manifest.append(os.path.join(dir, 'index.html'))
+ manifest.append(os.path.join(dir, 'index.ncx'))
- # Get masthead
- mpath = getattr(self, 'masthead_path', None)
- if mpath is not None and os.access(mpath, os.R_OK):
- manifest.append(mpath)
+ # Get cover
+ cpath = getattr(self, 'cover_path', None)
+ if cpath is None:
+ pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+ if self.default_cover(pf):
+ cpath = pf.name
+ if cpath is not None and os.access(cpath, os.R_OK):
+ opf.cover = cpath
+ manifest.append(cpath)
- opf.create_manifest_from_files_in(manifest)
- for mani in opf.manifest:
- if mani.path.endswith('.ncx'):
- mani.id = 'ncx'
- if mani.path.endswith('mastheadImage.jpg'):
- mani.id = 'masthead-image'
- entries = ['index.html']
- toc = TOC(base_path=dir)
- self.play_order_counter = 0
- self.play_order_map = {}
+ # Get masthead
+ mpath = getattr(self, 'masthead_path', None)
+ if mpath is not None and os.access(mpath, os.R_OK):
+ manifest.append(mpath)
- def feed_index(num, parent):
- f = feeds[num]
- for j, a in enumerate(f):
- if getattr(a, 'downloaded', False):
- adir = 'feed_%d/article_%d/'%(num, j)
- auth = a.author
- if not auth:
- auth = None
- desc = a.text_summary
- if not desc:
- desc = None
- else:
- desc = self.description_limiter(desc)
- entries.append('%sindex.html'%adir)
- po = self.play_order_map.get(entries[-1], None)
- if po is None:
- self.play_order_counter += 1
- po = self.play_order_counter
- parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+ opf.create_manifest_from_files_in(manifest)
+ for mani in opf.manifest:
+ if mani.path.endswith('.ncx'):
+ mani.id = 'ncx'
+ if mani.path.endswith('mastheadImage.jpg'):
+ mani.id = 'masthead-image'
+ entries = ['index.html']
+ toc = TOC(base_path=dir)
+ self.play_order_counter = 0
+ self.play_order_map = {}
+
+ def feed_index(num, parent):
+ f = feeds[num]
+ for j, a in enumerate(f):
+ if getattr(a, 'downloaded', False):
+ adir = 'feed_%d/article_%d/'%(num, j)
+ auth = a.author
+ if not auth:
+ auth = None
+ desc = a.text_summary
+ if not desc:
+ desc = None
+ else:
+ desc = self.description_limiter(desc)
+ entries.append('%sindex.html'%adir)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
- last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
- for sp in a.sub_pages:
- prefix = os.path.commonprefix([opf_path, sp])
- relp = sp[len(prefix):]
- entries.append(relp.replace(os.sep, '/'))
- last = sp
+ last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+ for sp in a.sub_pages:
+ prefix = os.path.commonprefix([opf_path, sp])
+ relp = sp[len(prefix):]
+ entries.append(relp.replace(os.sep, '/'))
+ last = sp
- if os.path.exists(last):
- with open(last, 'rb') as fi:
- src = fi.read().decode('utf-8')
- soup = BeautifulSoup(src)
- body = soup.find('body')
- if body is not None:
- prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
- templ = self.navbar.generate(True, num, j, len(f),
+ if os.path.exists(last):
+ with open(last, 'rb') as fi:
+ src = fi.read().decode('utf-8')
+ soup = BeautifulSoup(src)
+ body = soup.find('body')
+ if body is not None:
+ prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+ templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar)
- elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
- body.insert(len(body.contents), elem)
- with open(last, 'wb') as fi:
- fi.write(unicode(soup).encode('utf-8'))
- if len(feeds) == 0:
- raise Exception('All feeds are empty, aborting.')
+ elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+ body.insert(len(body.contents), elem)
+ with open(last, 'wb') as fi:
+ fi.write(unicode(soup).encode('utf-8'))
+ if len(feeds) == 0:
+ raise Exception('All feeds are empty, aborting.')
- if len(feeds) > 1:
- for i, f in enumerate(feeds):
- entries.append('feed_%d/index.html'%i)
- po = self.play_order_map.get(entries[-1], None)
- if po is None:
- self.play_order_counter += 1
- po = self.play_order_counter
- auth = getattr(f, 'author', None)
- if not auth:
- auth = None
- desc = getattr(f, 'description', None)
- if not desc:
- desc = None
- feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+ if len(feeds) > 1:
+ for i, f in enumerate(feeds):
+ entries.append('feed_%d/index.html'%i)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ auth = getattr(f, 'author', None)
+ if not auth:
+ auth = None
+ desc = getattr(f, 'description', None)
+ if not desc:
+ desc = None
+ feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
- else:
- entries.append('feed_%d/index.html'%0)
- feed_index(0, toc)
+ else:
+ entries.append('feed_%d/index.html'%0)
+ feed_index(0, toc)
- for i, p in enumerate(entries):
- entries[i] = os.path.join(dir, p.replace('/', os.sep))
- opf.create_spine(entries)
- opf.set_toc(toc)
+ for i, p in enumerate(entries):
+ entries[i] = os.path.join(dir, p.replace('/', os.sep))
+ opf.create_spine(entries)
+ opf.set_toc(toc)
+
+ with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+ opf.render(opf_file, ncx_file)
- with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
- opf.render(opf_file, ncx_file)
diff --git a/recipes/moldovaazi.recipe b/recipes/moldovaazi.recipe
new file mode 100644
index 0000000000..0c92297dc0
--- /dev/null
+++ b/recipes/moldovaazi.recipe
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+azi.md
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MoldovaAzi(BasicNewsRecipe):
+ title = u'Moldova Azi'
+ language = 'ro'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Moldova pe internet'
+ publisher = 'Moldova Azi'
+ category = 'Ziare,Stiri,Moldova'
+ oldest_article = 5
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ encoding = 'utf-8'
+ remove_javascript = True
+ cover_url = 'http://www.azi.md/images/logo.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [ dict(name='div', attrs={'id':'in'})
+ ]
+
+
+ remove_tags = [
+ dict(name='div', attrs={'class':'in-more-stories'})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'id':'comment_wrapper'})
+ , dict(name='div', attrs={'class':'box-title4'})
+ ]
+
+ feeds = [ (u'\u0218tiri', u'http://www.azi.md/ro/feeds/0/rss201') ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/recipes/newsmoldova.recipe b/recipes/newsmoldova.recipe
new file mode 100644
index 0000000000..e5e4bc5ac4
--- /dev/null
+++ b/recipes/newsmoldova.recipe
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+newsmoldova.md
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NewsMoldova(BasicNewsRecipe):
+ title = u'Agen\u0163ia de \u015ftiri Moldova'
+ language = 'ro'
+ __author__ = u'Silviu Cotoar\u0103'
+ description = u'Agen\u0163ia de \u015ftiri Moldova'
+ publisher = 'Moldova'
+ category = 'Ziare,Stiri,Moldova'
+ oldest_article = 5
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ encoding = 'utf-8'
+ remove_javascript = True
+ cover_url = 'http://www.newsmoldova.md/i/logo_top_md.gif'
+
+ conversion_options = {
+ 'comments' : description
+ ,'tags' : category
+ ,'language' : language
+ ,'publisher' : publisher
+ }
+
+ keep_only_tags = [ dict(name='div', attrs={'class':'main-article-index article'})
+ ]
+
+
+ remove_tags = [
+ dict(name='div', attrs={'id':'actions'})
+ , dict(name='li', attrs={'class':'invisible'})
+ ]
+
+ remove_tags_after = [
+ dict(name='div', attrs={'id':'actions'})
+ ]
+
+ feeds = [ (u'\u0218tiri', u'http://newsmoldova.md/export/rss2/archive/index.xml') ]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
diff --git a/recipes/readers_digest.recipe b/recipes/readers_digest.recipe
index 3689ca4c53..caf5cf081d 100644
--- a/recipes/readers_digest.recipe
+++ b/recipes/readers_digest.recipe
@@ -3,7 +3,6 @@ __license__ = 'GPL v3'
'''
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.web.feeds import Feed
class ReadersDigest(BasicNewsRecipe):
@@ -38,151 +37,20 @@ class ReadersDigest(BasicNewsRecipe):
'''
- remove_tags = [
- dict(name='h4', attrs={'class':'close'}),
- dict(name='div', attrs={'class':'fromLine'}),
- dict(name='img', attrs={'class':'colorTag'}),
- dict(name='div', attrs={'id':'sponsorArticleHeader'}),
- dict(name='div', attrs={'class':'horizontalAd'}),
- dict(name='div', attrs={'id':'imageCounterLeft'}),
- dict(name='div', attrs={'id':'commentsPrint'})
- ]
-
-
feeds = [
- ('New in RD', 'http://feeds.rd.com/ReadersDigest'),
- ('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
- ('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
- ('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
+ ('Food', 'http://www.rd.com/food/feed'),
+ ('Health', 'http://www.rd.com/health/feed'),
+ ('Home', 'http://www.rd.com/home/feed'),
+ ('Family', 'http://www.rd.com/family/feed'),
+ ('Money', 'http://www.rd.com/money/feed'),
+ ('Travel', 'http://www.rd.com/travel/feed'),
]
cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
-
-
-#-------------------------------------------------------------------------------------------------
-
- def print_version(self, url):
-
- # Get the identity number of the current article and append it to the root print URL
-
- if url.find('/article') > 0:
- ident = url[url.find('/article')+8:url.find('.html?')-4]
- url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
-
- elif url.find('/post') > 0:
-
- # in this case, have to get the page itself to derive the Print page.
- soup = self.index_to_soup(url)
- newsoup = soup.find('ul',attrs={'class':'printBlock'})
- url = 'http://www.rd.com' + newsoup('a')[0]['href']
- url = url[0:url.find('&Keep')]
-
- return url
-
-#-------------------------------------------------------------------------------------------------
-
- def parse_index(self):
-
- pages = [
- ('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
- # useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
- ('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
-
+ keep_only_tags = dict(id='main-content')
+ remove_tags = [
+ {'class':['post-categories']},
]
- feeds = []
-
- for page in pages:
- section, url, divider, attrList = page
- newArticles = self.page_parse(url, divider, attrList)
- feeds.append((section,newArticles))
-
- # after the pages of the site have been processed, parse several RSS feeds for additional sections
- newfeeds = Feed()
- newfeeds = self.parse_rss()
-
-
- # The utility code in parse_rss returns a Feed object. Convert each feed/article combination into a form suitable
- # for this module (parse_index).
-
- for feed in newfeeds:
- newArticles = []
- for article in feed.articles:
- newArt = {
- 'title' : article.title,
- 'url' : article.url,
- 'date' : article.date,
- 'description' : article.text_summary
- }
- newArticles.append(newArt)
-
-
- # New and Blogs should be the first two feeds.
- if feed.title == 'New in RD':
- feeds.insert(0,(feed.title,newArticles))
- elif feed.title == 'Blogs':
- feeds.insert(1,(feed.title,newArticles))
- else:
- feeds.append((feed.title,newArticles))
-
-
- return feeds
-
-#-------------------------------------------------------------------------------------------------
-
- def page_parse(self, mainurl, divider, attrList):
-
- articles = []
- mainsoup = self.index_to_soup(mainurl)
- for item in mainsoup.findAll(attrs=attrList):
- newArticle = {
- 'title' : item('img')[0]['alt'],
- 'url' : 'http://www.rd.com'+item('a')[0]['href'],
- 'date' : '',
- 'description' : ''
- }
- articles.append(newArticle)
-
-
-
- return articles
-
-
-
-#-------------------------------------------------------------------------------------------------
-
- def parse_rss (self):
-
- # Do the "official" parse_feeds first
- feeds = BasicNewsRecipe.parse_feeds(self)
-
-
- # Loop thru the articles in all feeds to find articles with "recipe" in it
- recipeArticles = []
- for curfeed in feeds:
- delList = []
- for a,curarticle in enumerate(curfeed.articles):
- if curarticle.title.upper().find('RECIPE') >= 0:
- recipeArticles.append(curarticle)
- delList.append(curarticle)
- if len(delList)>0:
- for d in delList:
- index = curfeed.articles.index(d)
- curfeed.articles[index:index+1] = []
-
- # If there are any recipes found, create a new Feed object and append.
- if len(recipeArticles) > 0:
- pfeed = Feed()
- pfeed.title = 'Recipes'
- pfeed.descrition = 'Recipe Feed (Virtual)'
- pfeed.image_url = None
- pfeed.oldest_article = 30
- pfeed.id_counter = len(recipeArticles)
- # Create a new Feed, add the recipe articles, and then append
- # to "official" list of feeds
- pfeed.articles = recipeArticles[:]
- feeds.append(pfeed)
-
- return feeds
diff --git a/recipes/strategy-business.recipe b/recipes/strategy-business.recipe
index ab58965e98..a4697ecfcd 100644
--- a/recipes/strategy-business.recipe
+++ b/recipes/strategy-business.recipe
@@ -33,7 +33,7 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
elif c.name.endswith('_password'):
br[c.name] = self.password
raw = br.submit().read()
- if '>Logout' not in raw:
+ if 'You have been logged in' not in raw:
raise ValueError('Failed to login, check your username and password')
return br
diff --git a/recipes/united_daily.recipe b/recipes/united_daily.recipe
new file mode 100644
index 0000000000..6954a7e725
--- /dev/null
+++ b/recipes/united_daily.recipe
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class UnitedDaily(BasicNewsRecipe):
+ title = u'聯合新聞網'
+ oldest_article = 1
+ max_articles_per_feed = 100
+
+ feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
+ (u'政治', u'http://udn.com/udnrss/politics.xml'),
+ (u'社會', u'http://udn.com/udnrss/social.xml'),
+ (u'生活', u'http://udn.com/udnrss/life.xml'),
+ (u'綜合', u'http://udn.com/udnrss/education.xml'),
+ (u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
+ (u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
+ (u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
+ (u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
+ (u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
+ (u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
+ (u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
+ (u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
+ (u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
+ (u'國際焦點', u'http://udn.com/udnrss/international.xml'),
+ (u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
+ (u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
+ (u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
+ (u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
+ (u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
+ (u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
+ (u'房市情報', u'http://udn.com/udnrss/houses.xml'),
+ (u'棒球', u'http://udn.com/udnrss/baseball.xml'),
+ (u'籃球', u'http://udn.com/udnrss/basketball.xml'),
+ (u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
+ (u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
+ (u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
+ (u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
+ (u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
+ (u'電影世界', u'http://udn.com/udnrss/movie.xml'),
+ (u'流行音樂', u'http://udn.com/udnrss/music.xml'),
+ (u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
+ (u'食樂指南', u'http://udn.com/udnrss/food.xml'),
+ (u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
+ (u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
+ (u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
+ (u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
+ (u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
+ ]
+
+ extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
+
+ __author__ = 'Eddie Lau'
+ __version__ = '1.0'
+ language = 'zh'
+ publisher = 'United Daily News Group'
+ description = 'United Daily (Taiwan)'
+ category = 'News, Chinese, Taiwan'
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ encoding = 'big5'
+ conversion_options = {'linearize_tables':True}
+ masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
+ cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
+ keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
+ remove_tags = [dict(name='div', attrs={'id':['mvouter']})]
diff --git a/setup/commands.py b/setup/commands.py
index 7e22ff14f3..febc684c08 100644
--- a/setup/commands.py
+++ b/setup/commands.py
@@ -11,7 +11,7 @@ __all__ = [
'build', 'build_pdf2xml', 'server',
'gui',
'develop', 'install',
- 'resources',
+ 'kakasi', 'resources',
'check',
'sdist',
'manual', 'tag_release',
@@ -49,8 +49,9 @@ gui = GUI()
from setup.check import Check
check = Check()
-from setup.resources import Resources
+from setup.resources import Resources, Kakasi
resources = Resources()
+kakasi = Kakasi()
from setup.publish import Manual, TagRelease, Stage1, Stage2, \
Stage3, Stage4, Publish
diff --git a/setup/installer/windows/__init__.py b/setup/installer/windows/__init__.py
index b51eccc832..59042ac56c 100644
--- a/setup/installer/windows/__init__.py
+++ b/setup/installer/windows/__init__.py
@@ -32,6 +32,7 @@ class Win32(VMInstaller):
FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
INSTALLER_EXT = 'msi'
SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
+ BUILD_BUILD = ['python setup.py kakasi',] + VMInstaller.BUILD_BUILD
def download_installer(self):
installer = self.installer()
diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py
index f666427598..7fb60968e7 100644
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn
OPENSSL_DIR = r'Q:\openssl'
-QT_DIR = 'Q:\\Qt\\4.7.2'
+QT_DIR = 'Q:\\Qt\\4.7.3'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
diff --git a/setup/installer/windows/wix-template.xml b/setup/installer/windows/wix-template.xml
index b5d2f4b292..5de08e155f 100644
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@@ -11,9 +11,6 @@
SummaryCodepage='1252' />
-
-
ci
'b' : ('ci', 'bold______', self.bool_st_func),
'blue' : ('ci', 'blue______', self.color_func),
- 'caps' : ('ci', 'caps______', self.bool_st_func),
- 'cf' : ('ci', 'font-color', self.default_func),
+ 'caps' : ('ci', 'caps______', self.bool_st_func),
+ 'cf' : ('ci', 'font-color', self.colorz_func),
'chftn' : ('ci', 'footnot-mk', self.bool_st_func),
'dn' : ('ci', 'font-down_', self.divide_by_2),
'embo' : ('ci', 'emboss____', self.bool_st_func),
@@ -624,6 +624,11 @@ class ProcessTokens:
num = 'true'
return 'cw<%s<%s'
+__docformat__ = 'restructuredtext en'
+
+import urllib2
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.search_result import SearchResult
+from calibre.gui2.store.web_store_dialog import WebStoreDialog
+
+class BeamEBooksDEStore(BasicStoreConfig, StorePlugin):
+
+ def open(self, parent=None, detail_item=None, external=False):
+ url = 'http://klick.affiliwelt.net/klick.php?bannerid=10072&pid=32307&prid=908'
+ url_details = ('http://klick.affiliwelt.net/klick.php?'
+ 'bannerid=10730&pid=32307&prid=908&prodid={0}')
+
+ if external or self.config.get('open_external', False):
+ if detail_item:
+ url = url_details.format(detail_item)
+ open_url(QUrl(url))
+ else:
+ detail_url = None
+ if detail_item:
+ detail_url = url_details.format(detail_item)
+ d = WebStoreDialog(self.gui, url, parent, detail_url)
+ d.setWindowTitle(self.name)
+ d.set_tags(self.config.get('tags', ''))
+ d.exec_()
+
+ def search(self, query, max_results=10, timeout=60):
+ url = 'http://www.beam-ebooks.de/suchergebnis.php?Type=&sw=' + urllib2.quote(query)
+ br = browser()
+
+ counter = max_results
+ with closing(br.open(url, timeout=timeout)) as f:
+ doc = html.fromstring(f.read())
+ for data in doc.xpath('//table[tr/td/div[@class="stil2"]]'):
+ if counter <= 0:
+ break
+
+ id = ''.join(data.xpath('./tr/td/div[@class="stil2"]/a/@href')).strip()
+ if not id:
+ continue
+ id = id[7:]
+ cover_url = ''.join(data.xpath('./tr/td[1]/a/img/@src'))
+ if cover_url:
+ cover_url = 'http://www.beam-ebooks.de' + cover_url
+ title = ''.join(data.xpath('./tr/td/div[@class="stil2"]/a/b/text()'))
+ author = ' '.join(data.xpath('./tr/td/div[@class="stil2"]/'
+ 'child::b/text()'
+ '|'
+ './tr/td/div[@class="stil2"]/'
+ 'child::strong/text()'))
+ price = ''.join(data.xpath('./tr/td[3]/text()'))
+ pdf = data.xpath(
+ 'boolean(./tr/td[3]/a/img[contains(@alt, "PDF")]/@alt)')
+ epub = data.xpath(
+ 'boolean(./tr/td[3]/a/img[contains(@alt, "ePub")]/@alt)')
+ mobi = data.xpath(
+ 'boolean(./tr/td[3]/a/img[contains(@alt, "Mobipocket")]/@alt)')
+ counter -= 1
+
+ s = SearchResult()
+ s.cover_url = cover_url
+ s.title = title.strip()
+ s.author = author.strip()
+ s.price = price
+ s.drm = SearchResult.DRM_UNLOCKED
+ s.detail_item = id
+ formats = []
+ if epub:
+ formats.append('ePub')
+ if pdf:
+ formats.append('PDF')
+ if mobi:
+ formats.append('MOBI')
+ s.formats = ', '.join(formats)
+
+ yield s
diff --git a/src/calibre/gui2/store/bn_plugin.py b/src/calibre/gui2/store/bn_plugin.py
index f26a60c89d..62826e825d 100644
--- a/src/calibre/gui2/store/bn_plugin.py
+++ b/src/calibre/gui2/store/bn_plugin.py
@@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import random
import re
-import urllib2
+import urllib
from contextlib import closing
from lxml import html
@@ -48,7 +48,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
def search(self, query, max_results=10, timeout=60):
url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results
- url += urllib2.quote(query)
+ url += urllib.quote_plus(query)
br = browser()
diff --git a/src/calibre/gui2/store/declined.txt b/src/calibre/gui2/store/declined.txt
new file mode 100644
index 0000000000..2b0e5caed2
--- /dev/null
+++ b/src/calibre/gui2/store/declined.txt
@@ -0,0 +1,5 @@
+This is a list of stores that objected, declined
+or asked not to be included in the store integration.
+
+* Borders (http://www.borders.com/)
+* WH Smith (http://www.whsmith.co.uk/)
diff --git a/src/calibre/gui2/store/epubbuy_de_plugin.py b/src/calibre/gui2/store/epubbuy_de_plugin.py
new file mode 100644
index 0000000000..242ef76793
--- /dev/null
+++ b/src/calibre/gui2/store/epubbuy_de_plugin.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember '
+__docformat__ = 'restructuredtext en'
+
+import urllib2
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.search_result import SearchResult
+from calibre.gui2.store.web_store_dialog import WebStoreDialog
+
+class EPubBuyDEStore(BasicStoreConfig, StorePlugin):
+
+ def open(self, parent=None, detail_item=None, external=False):
+ url = 'http://klick.affiliwelt.net/klick.php?bannerid=47653&pid=32307&prid=2627'
+ url_details = ('http://klick.affiliwelt.net/klick.php?bannerid=47653'
+ '&pid=32307&prid=2627&prodid={0}')
+
+ if external or self.config.get('open_external', False):
+ if detail_item:
+ url = url_details.format(detail_item)
+ open_url(QUrl(url))
+ else:
+ detail_url = None
+ if detail_item:
+ detail_url = url_details.format(detail_item)
+ d = WebStoreDialog(self.gui, url, parent, detail_url)
+ d.setWindowTitle(self.name)
+ d.set_tags(self.config.get('tags', ''))
+ d.exec_()
+
+ def search(self, query, max_results=10, timeout=60):
+ url = 'http://www.epubbuy.com/search.php?search_query=' + urllib2.quote(query)
+ br = browser()
+
+ counter = max_results
+ with closing(br.open(url, timeout=timeout)) as f:
+ doc = html.fromstring(f.read())
+ for data in doc.xpath('//li[contains(@class, "ajax_block_product")]'):
+ if counter <= 0:
+ break
+
+ id = ''.join(data.xpath('./div[@class="center_block"]'
+ '/p[contains(text(), "artnr:")]/text()')).strip()
+ if not id:
+ continue
+ id = id[6:].strip()
+ if not id:
+ continue
+ cover_url = ''.join(data.xpath('./div[@class="center_block"]'
+ '/a[@class="product_img_link"]/img/@src'))
+ if cover_url:
+ cover_url = 'http://www.epubbuy.com' + cover_url
+ title = ''.join(data.xpath('./div[@class="center_block"]'
+ '/a[@class="product_img_link"]/@title'))
+ author = ''.join(data.xpath('./div[@class="center_block"]/a[2]/text()'))
+ price = ''.join(data.xpath('.//span[@class="price"]/text()'))
+ counter -= 1
+
+ s = SearchResult()
+ s.cover_url = cover_url
+ s.title = title.strip()
+ s.author = author.strip()
+ s.price = price
+ s.drm = SearchResult.DRM_UNLOCKED
+ s.detail_item = id
+ s.formats = 'ePub'
+
+ yield s
diff --git a/src/calibre/gui2/store/foyles_uk_plugin.py b/src/calibre/gui2/store/foyles_uk_plugin.py
index ca35fb6bb2..1a997cd671 100644
--- a/src/calibre/gui2/store/foyles_uk_plugin.py
+++ b/src/calibre/gui2/store/foyles_uk_plugin.py
@@ -73,6 +73,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
s.price = price
s.detail_item = id
s.drm = SearchResult.DRM_LOCKED
- s.formats = 'EPUB'
+ s.formats = 'ePub'
yield s
diff --git a/src/calibre/gui2/store/search/download_thread.py b/src/calibre/gui2/store/search/download_thread.py
index 6dd59cc5a7..97279d7773 100644
--- a/src/calibre/gui2/store/search/download_thread.py
+++ b/src/calibre/gui2/store/search/download_thread.py
@@ -12,6 +12,7 @@ from threading import Thread
from Queue import Queue
from calibre import browser
+from calibre.constants import DEBUG
from calibre.utils.magick.draw import thumbnail
class GenericDownloadThreadPool(object):
@@ -119,7 +120,8 @@ class SearchThread(Thread):
self.results.put((res, store_plugin))
self.tasks.task_done()
except:
- traceback.print_exc()
+ if DEBUG:
+ traceback.print_exc()
class CoverThreadPool(GenericDownloadThreadPool):
@@ -157,7 +159,8 @@ class CoverThread(Thread):
callback()
self.tasks.task_done()
except:
- continue
+ if DEBUG:
+ traceback.print_exc()
class DetailsThreadPool(GenericDownloadThreadPool):
@@ -191,7 +194,8 @@ class DetailsThread(Thread):
callback(result)
self.tasks.task_done()
except:
- continue
+ if DEBUG:
+ traceback.print_exc()
class CacheUpdateThreadPool(GenericDownloadThreadPool):
@@ -221,4 +225,5 @@ class CacheUpdateThread(Thread):
store_plugin, timeout = self.tasks.get()
store_plugin.update_cache(timeout=timeout, suppress_progress=True)
except:
- traceback.print_exc()
+ if DEBUG:
+ traceback.print_exc()
diff --git a/src/calibre/gui2/store/search/search.py b/src/calibre/gui2/store/search/search.py
index 07d4afca54..eea1a692de 100644
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@@ -23,8 +23,8 @@ TIMEOUT = 75 # seconds
class SearchDialog(QDialog, Ui_Dialog):
- def __init__(self, istores, *args):
- QDialog.__init__(self, *args)
+ def __init__(self, istores, parent=None, query=''):
+ QDialog.__init__(self, parent)
self.setupUi(self)
self.config = JSONConfig('store/search')
@@ -47,13 +47,16 @@ class SearchDialog(QDialog, Ui_Dialog):
# per search basis.
stores_group_layout = QVBoxLayout()
self.stores_group.setLayout(stores_group_layout)
- for x in self.store_plugins:
+ for x in sorted(self.store_plugins.keys(), key=lambda x: x.lower()):
cbox = QCheckBox(x)
cbox.setChecked(True)
stores_group_layout.addWidget(cbox)
setattr(self, 'store_check_' + x, cbox)
stores_group_layout.addStretch()
+ # Set the search query
+ self.search_edit.setText(query)
+
# Create and add the progress indicator
self.pi = ProgressIndicator(self, 24)
self.top_layout.addWidget(self.pi)
@@ -93,7 +96,7 @@ class SearchDialog(QDialog, Ui_Dialog):
# Store / Formats
self.results_view.setColumnWidth(4, int(total*.25))
- def do_search(self, checked=False):
+ def do_search(self):
# Stop all running threads.
self.checker.stop()
self.search_pool.abort()
@@ -136,14 +139,17 @@ class SearchDialog(QDialog, Ui_Dialog):
query = query.replace('>', '')
query = query.replace('<', '')
# Remove the prefix.
- for loc in ( 'all', 'author', 'authors', 'title'):
- query = re.sub(r'%s:"?(?P[^\s"]+)"?' % loc, '\g', query)
+ for loc in ('all', 'author', 'authors', 'title'):
+ query = re.sub(r'%s:"(?P[^\s"]+)"' % loc, '\g', query)
+ query = query.replace('%s:' % loc, '')
# Remove the prefix and search text.
for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'):
query = re.sub(r'%s:"[^"]"' % loc, '', query)
query = re.sub(r'%s:[^\s]*' % loc, '', query)
# Remove logic.
- query = re.sub(r'(^|\s)(and|not|or)(\s|$)', ' ', query)
+ query = re.sub(r'(^|\s)(and|not|or|a|the|is|of)(\s|$)', ' ', query)
+ # Remove "
+ query = query.replace('"', '')
# Remove excess whitespace.
query = re.sub(r'\s{2,}', ' ', query)
query = query.strip()
@@ -252,4 +258,9 @@ class SearchDialog(QDialog, Ui_Dialog):
self.search_pool.abort()
self.cache_pool.abort()
self.save_state()
+
+ def exec_(self):
+ if unicode(self.search_edit.text()).strip():
+ self.do_search()
+ return QDialog.exec_(self)
diff --git a/src/calibre/gui2/store/waterstones_uk_plugin.py b/src/calibre/gui2/store/waterstones_uk_plugin.py
index d422165c47..a5065128ba 100644
--- a/src/calibre/gui2/store/waterstones_uk_plugin.py
+++ b/src/calibre/gui2/store/waterstones_uk_plugin.py
@@ -76,7 +76,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
s.detail_item = id
formats = []
if epub:
- formats.append('EPUB')
+ formats.append('ePub')
if pdf:
formats.append('PDF')
s.formats = ', '.join(formats)
diff --git a/src/calibre/gui2/store/weightless_books_plugin.py b/src/calibre/gui2/store/weightless_books_plugin.py
new file mode 100644
index 0000000000..3fa1c76851
--- /dev/null
+++ b/src/calibre/gui2/store/weightless_books_plugin.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember '
+__docformat__ = 'restructuredtext en'
+
+import urllib
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser, url_slash_cleaner
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.search_result import SearchResult
+from calibre.gui2.store.web_store_dialog import WebStoreDialog
+
+class WeightlessBooksStore(BasicStoreConfig, StorePlugin):
+
+ def open(self, parent=None, detail_item=None, external=False):
+ url = 'http://weightlessbooks.com/'
+
+ if external or self.config.get('open_external', False):
+ open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
+ else:
+ d = WebStoreDialog(self.gui, url, parent, detail_item)
+ d.setWindowTitle(self.name)
+ d.set_tags(self.config.get('tags', ''))
+ d.exec_()
+
+ def search(self, query, max_results=10, timeout=60):
+ url = 'http://weightlessbooks.com/?s=' + urllib.quote_plus(query)
+
+ br = browser()
+
+ counter = max_results
+ with closing(br.open(url, timeout=timeout)) as f:
+ doc = html.fromstring(f.read())
+ for data in doc.xpath('//li[@id="product"]'):
+ if counter <= 0:
+ break
+
+ id = ''.join(data.xpath('.//div[@class="cover"]/a/@href'))
+ if not id:
+ continue
+
+ cover_url = ''.join(data.xpath('.//div[@class="cover"]/a/img/@src'))
+
+ price = ''.join(data.xpath('.//div[@class="buy_buttons"]/b[1]/text()'))
+ if not price:
+ continue
+
+ formats = ', '.join(data.xpath('.//select[@class="eStore_variation"]//option//text()'))
+ formats = formats.upper()
+
+ title = ''.join(data.xpath('.//h3/a/text()'))
+ author = ''.join(data.xpath('.//h3//text()'))
+ author = author.replace(title, '')
+
+ counter -= 1
+
+ s = SearchResult()
+ s.cover_url = cover_url
+ s.title = title.strip()
+ s.author = author.strip()
+ s.price = price.strip()
+ s.detail_item = id.strip()
+ s.drm = SearchResult.DRM_UNLOCKED
+ s.formats = formats
+
+ yield s
diff --git a/src/calibre/gui2/store/wizards_tower_books_plugin.py b/src/calibre/gui2/store/wizards_tower_books_plugin.py
new file mode 100644
index 0000000000..56bb00ff7e
--- /dev/null
+++ b/src/calibre/gui2/store/wizards_tower_books_plugin.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember '
+__docformat__ = 'restructuredtext en'
+
+import urllib
+from contextlib import closing
+
+from lxml import html
+
+from PyQt4.Qt import QUrl
+
+from calibre import browser, url_slash_cleaner
+from calibre.gui2 import open_url
+from calibre.gui2.store import StorePlugin
+from calibre.gui2.store.basic_config import BasicStoreConfig
+from calibre.gui2.store.search_result import SearchResult
+from calibre.gui2.store.web_store_dialog import WebStoreDialog
+
+class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
+
+ url = 'http://www.wizardstowerbooks.com/'
+
+ def open(self, parent=None, detail_item=None, external=False):
+ if detail_item:
+ detail_item = self.url + detail_item
+
+ if external or self.config.get('open_external', False):
+ open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
+ else:
+ d = WebStoreDialog(self.gui, self.url, parent, detail_item)
+ d.setWindowTitle(self.name)
+ d.set_tags(self.config.get('tags', ''))
+ d.exec_()
+
+ def search(self, query, max_results=10, timeout=60):
+ url = 'http://www.wizardstowerbooks.com/search.html?for=' + urllib.quote(query)
+
+ br = browser()
+
+ counter = max_results
+ with closing(br.open(url, timeout=timeout)) as f:
+ doc = html.fromstring(f.read())
+ for data in doc.xpath('//table[@class="gridp"]//td'):
+ if counter <= 0:
+ break
+
+ id = ''.join(data.xpath('.//span[@class="prti"]/a/@href'))
+ id = id.strip()
+ if not id:
+ continue
+
+ cover_url = ''.join(data.xpath('.//div[@class="prim"]/a/img/@src'))
+ cover_url = url_slash_cleaner(self.url + cover_url.strip())
+
+ price = ''.join(data.xpath('.//font[@class="selling_price"]//text()'))
+ price = price.strip()
+ if not price:
+ continue
+
+ title = ''.join(data.xpath('.//span[@class="prti"]/a/b/text()'))
+ author = ''.join(data.xpath('.//p[@class="last"]/text()'))
+ a, b, author = author.partition(' by ')
+
+ counter -= 1
+
+ s = SearchResult()
+ s.cover_url = cover_url
+ s.title = title.strip()
+ s.author = author.strip()
+ s.price = price.strip()
+ s.detail_item = id.strip()
+ s.drm = SearchResult.DRM_UNLOCKED
+
+ yield s
+
+ def get_details(self, search_result, timeout):
+ br = browser()
+ with closing(br.open(url_slash_cleaner(self.url + search_result.detail_item), timeout=timeout)) as nf:
+ idata = html.fromstring(nf.read())
+
+ formats = ', '.join(idata.xpath('//select[@id="N1_"]//option//text()'))
+ search_result.formats = formats.upper()
+
+ return True
diff --git a/src/calibre/gui2/wizard/__init__.py b/src/calibre/gui2/wizard/__init__.py
index a32347dc72..6b1a793fc8 100644
--- a/src/calibre/gui2/wizard/__init__.py
+++ b/src/calibre/gui2/wizard/__init__.py
@@ -435,7 +435,7 @@ class DevicePage(QWizardPage, DeviceUI):
self.registerField("device", self.device_view)
def initializePage(self):
- self.label.setText(_('Choose you e-book device. If your device is'
+ self.label.setText(_('Choose your e-book device. If your device is'
' not in the list, choose a "%s" device.')%Device.manufacturer)
self.man_model = ManufacturerModel()
self.manufacturer_view.setModel(self.man_model)
diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py
index a04aa48bf7..b1dbebb775 100644
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@@ -1415,18 +1415,34 @@ ol, ul { padding-left: 2em; }
self.writedata()
c = attrs.get( (TEXTNS,'style-name'), None)
htmlattrs = {}
+ # Changed by Kovid to handle inline special styles defined on tags.
+ # Apparently LibreOffice does this.
+ special = 'span'
if c:
c = c.replace(".","_")
special = special_styles.get("S-"+c)
- if special is None and self.generate_css:
- htmlattrs['class'] = "S-%s" % c
- self.opentag('span', htmlattrs)
+ if special is None:
+ special = 'span'
+ if self.generate_css:
+ htmlattrs['class'] = "S-%s" % c
+
+ self.opentag(special, htmlattrs)
self.purgedata()
def e_text_span(self, tag, attrs):
""" End the """
self.writedata()
- self.closetag('span', False)
+ c = attrs.get( (TEXTNS,'style-name'), None)
+ # Changed by Kovid to handle inline special styles defined on tags.
+ # Apparently LibreOffice does this.
+ special = 'span'
+ if c:
+ c = c.replace(".","_")
+ special = special_styles.get("S-"+c)
+ if special is None:
+ special = 'span'
+
+ self.closetag(special, False)
self.purgedata()
def s_text_tab(self, tag, attrs):