merge from trunk
@ -30,3 +30,4 @@ nbproject/
|
||||
.project
|
||||
.pydevproject
|
||||
.settings/
|
||||
*.DS_Store
|
||||
|
@ -19,6 +19,90 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.1
|
||||
date: 2011-05-13
|
||||
|
||||
new features:
|
||||
- title: "Add Amazon DE, Beam EBooks, Beam DE, Weightless Books, Wizards Tower Books to the list of ebook stores searched by Get Books"
|
||||
|
||||
- title: "TXT output: All new Textile output with much greater preservation of formatting from the input document"
|
||||
|
||||
- title: "Migrate metadata plugin for Douban Books to the 0.8 API"
|
||||
|
||||
- title: "Driver for Dell Streak on windows"
|
||||
|
||||
- title: "Add menu items to Get Books action to search by title and author of current book"
|
||||
|
||||
- title: "Add title_sort as available field to CSV/XML catalogs"
|
||||
|
||||
- title: "Add a context menu to the manage authors dialog"
|
||||
|
||||
- title: "Add a button to paste isbn into the identifiers field in the edit metadata dialog automatically"
|
||||
|
||||
bug fixes:
|
||||
- title: "Amazon metadata download plugin: Fix links being stripped from comments. Also fix ratings/isbn not being parsed from kindle edition pages."
|
||||
tickets: [782012]
|
||||
|
||||
- title: "Fix one source of segfaults on shutdown in the linux binary builds."
|
||||
|
||||
- title: "Allow the use of condensed/expanded fonts as interface fonts"
|
||||
|
||||
- title: "EPUB Input: Ignore missing cover file when converting, instead of erroring out."
|
||||
tickets: [781848]
|
||||
|
||||
- title: "Fix custom identifier being erased by metadata download"
|
||||
tickets: [781759]
|
||||
|
||||
- title: "Fix regression that broke various things when using Japanese language calibre on windows"
|
||||
tickets: [780804]
|
||||
|
||||
- title: "RTF Input: Handle null color codes correctly"
|
||||
tickets: [780728]
|
||||
|
||||
- title: "ODT Input: Handle inline special styles defined on <text:span> tags."
|
||||
tickets: [780250]
|
||||
|
||||
- title: "Fix error when pressing next previous button with an empty search in the Plugins preferences"
|
||||
tickets: [781135]
|
||||
|
||||
- title: "Ignore 'Unknown' author when downloading metadata."
|
||||
tickets: [779348]
|
||||
|
||||
- title: "Fix timezone bug when setting dates in the edit metadata dialog"
|
||||
tickets: [779497]
|
||||
|
||||
- title: "Fix ebook-convert not recognizing output paths starting with .."
|
||||
tickets: [779322]
|
||||
|
||||
improved recipes:
|
||||
- "Strategy+Business"
|
||||
- Readers Digest
|
||||
- Ming Pao
|
||||
- Telepolis
|
||||
- Fronda
|
||||
- Rzeczpospolita
|
||||
|
||||
new recipes:
|
||||
- title: "Various Taiwanese news sources"
|
||||
author: Eddie Lau
|
||||
|
||||
- title: Replica Vedetelor, Ziua Veche
|
||||
author: Silviu Cotoara
|
||||
|
||||
- title: Welt der Physik
|
||||
author: schuster
|
||||
|
||||
- title: Korea Herald
|
||||
author: Seongkyoun Yoo
|
||||
|
||||
|
||||
- version: 0.8.0
|
||||
date: 2010-05-06
|
||||
|
||||
new features:
|
||||
- title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0"
|
||||
type: major
|
||||
|
||||
- version: 0.7.59
|
||||
date: 2011-04-30
|
||||
|
||||
|
@ -93,7 +93,7 @@ class Arcamax(BasicNewsRecipe):
|
||||
for page in pages:
|
||||
page_soup = self.index_to_soup(url)
|
||||
if page_soup:
|
||||
title = page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0]
|
||||
title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0])
|
||||
page_url = url
|
||||
# orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
|
||||
prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
|
||||
@ -127,4 +127,3 @@ class Arcamax(BasicNewsRecipe):
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
|
42
recipes/china_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
||||
title = u'中時電子報'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'),
|
||||
(u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'),
|
||||
(u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'),
|
||||
(u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'),
|
||||
(u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'),
|
||||
(u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'),
|
||||
(u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'),
|
||||
(u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'),
|
||||
(u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'),
|
||||
(u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'),
|
||||
#(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'),
|
||||
#(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
|
||||
#(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
|
||||
]
|
||||
|
||||
__author__ = 'einstuerzende, updated by Eddie Lau'
|
||||
__version__ = '1.0'
|
||||
language = 'zh'
|
||||
publisher = 'China Times Group'
|
||||
description = 'China Times (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
|
||||
cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['focus-news']})]
|
||||
|
53
recipes/divahair.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
divahair.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DivaHair(BasicNewsRecipe):
|
||||
title = u'Diva Hair'
|
||||
language = 'ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Coafuri, frizuri, tunsori ..'
|
||||
publisher = u'Diva Hair'
|
||||
category = u'Ziare,Stiri,Coafuri,Femei'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.divahair.ro/imgs/logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='td', attrs={'class':'spatiuart'})
|
||||
, dict(name='div', attrs={'class':'spatiuart'})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'categorie'})
|
||||
, dict(name='div', attrs={'class':'gri gri2 detaliiart'})
|
||||
, dict(name='div', attrs={'class':'articol_box_bottom'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'articol_box_bottom'})
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://www.divahair.ro/feed') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
64
recipes/financialsense.recipe
Normal file
@ -0,0 +1,64 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.financialsense.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FinancialSense(BasicNewsRecipe):
|
||||
title = 'Financial Sense'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Uncommon News & Views for the Wise Investor'
|
||||
publisher = 'Financial Sense'
|
||||
category = 'news, finances, politics, USA'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://www.financialsense.com/sites/default/files/logo.jpg'
|
||||
extra_css = """
|
||||
body{font-family: Arial,"Helvetica Neue",Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
h2{color: gray}
|
||||
.name{margin-right: 5em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags =[dict(name=['meta','link','base','object','embed','iframe'])]
|
||||
remove_tags_after=dict(attrs={'class':'vcard'})
|
||||
keep_only_tags =[dict(attrs={'class':['title','post-meta','content','item-title','vcard']})]
|
||||
remove_attributes=['lang','type']
|
||||
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/fso')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
@ -21,14 +21,19 @@ class Fronda(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
|
||||
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':'big'}),
|
||||
dict(name='ul', attrs={'class':'about clear'}),
|
||||
dict(name='div', attrs={'class':'content'})]
|
||||
keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}),
|
||||
dict(name='div', attrs={'class':'naglowek_tresc'}),
|
||||
dict(name='div', attrs={'id':'czytaj'}) ]
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'print'})]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[ (r'<a href="#" class="print">Drukuj</a>', lambda match: ''),
|
||||
(r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
|
||||
[ (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
|
||||
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
|
||||
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
|
||||
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>') ]
|
||||
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>'),
|
||||
(r'<p[^>]*> </p>', lambda match: ''),
|
||||
(r'<p><span style=".*?"><br /></span></p> ', lambda match: ''),
|
||||
(r'<a style=\'float:right;margin-top:3px;\' href="http://www.facebook.com/share.php?.*?</a>', lambda match: '')]
|
||||
]
|
||||
|
BIN
recipes/icons/divahair.png
Normal file
After Width: | Height: | Size: 675 B |
BIN
recipes/icons/financialsense.png
Normal file
After Width: | Height: | Size: 702 B |
BIN
recipes/icons/iprofesional.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/mayra.png
Normal file
After Width: | Height: | Size: 620 B |
BIN
recipes/icons/moldovaazi.png
Normal file
After Width: | Height: | Size: 243 B |
BIN
recipes/icons/newsmoldova.png
Normal file
After Width: | Height: | Size: 837 B |
BIN
recipes/icons/replicavedetelor.png
Normal file
After Width: | Height: | Size: 709 B |
BIN
recipes/icons/rzeczpospolita.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/ziuaveche.png
Normal file
After Width: | Height: | Size: 554 B |
79
recipes/iprofesional.recipe
Normal file
@ -0,0 +1,79 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.iprofesional.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class iProfesional(BasicNewsRecipe):
|
||||
title = 'iProfesional.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Las ultimas noticias sobre profesionales'
|
||||
publisher = 'Emprendimientos Corporativos S.A.'
|
||||
category = 'news, IT, impuestos, negocios, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'nesportal'
|
||||
masthead_url = 'http://www.iprofesional.com/img/logo-iprofesional.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.titulo-interior{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
.autor-nota{font-size: small; font-weight: bold; font-style: italic; color: gray}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['fecha','interior-nota']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','base','embed','object','iframe'])
|
||||
,dict(attrs={'class':['menu-imprimir','guardarNota','IN-widget','fin','permalink']})
|
||||
]
|
||||
remove_attributes=['lang','xmlns:og','xmlns:fb']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias' , u'http://feeds.feedburner.com/iprofesional-principales-noticias')
|
||||
,(u'Finanzas' , u'http://feeds.feedburner.com/iprofesional-finanzas' )
|
||||
,(u'Impuestos' , u'http://feeds.feedburner.com/iprofesional-impuestos' )
|
||||
,(u'Negocios' , u'http://feeds.feedburner.com/iprofesional-economia' )
|
||||
,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior' )
|
||||
,(u'Tecnologia' , u'http://feeds.feedburner.com/iprofesional-tecnologia' )
|
||||
,(u'Management' , u'http://feeds.feedburner.com/iprofesional-managment' )
|
||||
,(u'Marketing' , u'http://feeds.feedburner.com/iprofesional-marketing' )
|
||||
,(u'Legales' , u'http://feeds.feedburner.com/iprofesional-legales' )
|
||||
,(u'Autos' , u'http://feeds.feedburner.com/iprofesional-autos' )
|
||||
,(u'Vinos' , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
36
recipes/korea_herald.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
Profile to download KoreaHerald
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class KoreaHerald(BasicNewsRecipe):
|
||||
title = u'KoreaHerald'
|
||||
language = 'en'
|
||||
description = u'Korea Herald News articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 10
|
||||
recursions = 3
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [
|
||||
dict(id=['contentLeft', '_article'])
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
|
||||
dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('All News','http://www.koreaherald.com/rss/020000000000.xml'),
|
||||
('National','http://www.koreaherald.com/rss/020100000000.xml'),
|
||||
('Business','http://www.koreaherald.com/rss/020200000000.xml'),
|
||||
('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
|
||||
('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
|
||||
('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
|
||||
('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
|
||||
('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
|
||||
]
|
44
recipes/liberty_times.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
||||
title = u'自由電子報'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'),
|
||||
(u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'),
|
||||
(u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'),
|
||||
(u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'),
|
||||
(u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'),
|
||||
(u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'),
|
||||
(u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'),
|
||||
(u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'),
|
||||
(u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'),
|
||||
(u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'),
|
||||
(u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'),
|
||||
(u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'),
|
||||
(u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'),
|
||||
(u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'),
|
||||
(u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'),
|
||||
]
|
||||
extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
|
||||
__author__ = 'einstuerzende, updated by Eddie Lau'
|
||||
__version__ = '1.1'
|
||||
language = 'zh'
|
||||
publisher = 'Liberty Times Group'
|
||||
description = 'Liberty Times (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
|
||||
cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
|
||||
keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]
|
||||
|
51
recipes/mayra.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
mayra.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Mayra(BasicNewsRecipe):
|
||||
title = u'Mayra'
|
||||
language = 'ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Traieste urban, cool, sexy'
|
||||
publisher = 'Mayra'
|
||||
category = 'Ziare,Stiri,Reviste'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://img.konkurs.ro/img/concursuri-cu-premii/147/14672_front.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'article_details'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'LikePluginPagelet'})
|
||||
, dict(name='p', attrs={'id':'tags'})
|
||||
, dict(name='span', attrs={'id':'tweet-button'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'LikePluginPagelet'})
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://www.mayra.ro/rss') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,15 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Eddie Lau'
|
||||
|
||||
# Users of Kindle 3 (with limited system-level CJK support)
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
__MakePeriodical__ = True
|
||||
# Turn it to True if your device supports display of CJK titles
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
__UseChineseTitle__ = False
|
||||
|
||||
# Trun below to true if you wish to use life.mingpao.com as the main article source
|
||||
__UseLife__ = True
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||
2011/02/28: rearrange the sections
|
||||
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||
@ -32,41 +35,43 @@ import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
class MPHKRecipe(BasicNewsRecipe):
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
publisher = 'MingPao'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
publisher = 'MingPao'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['photo']})
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
]
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page
|
||||
dict(name='table')] # for content fetched from life.mingpao.com
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='table')] # for content fetched from life.mingpao.com
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<h1>'),
|
||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||
@ -80,10 +85,10 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
lambda match: "</b>")
|
||||
]
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
@ -115,314 +120,357 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return dt_local
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchday(self):
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
def get_fetchday(self):
|
||||
# dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
def get_cover_url(self):
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
if __UseLife__:
|
||||
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
||||
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
||||
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
|
||||
return feeds
|
||||
return feeds
|
||||
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
# parse from news.mingpao.com
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_fin_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ent_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
def parse_fin_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
|
||||
def parse_col_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
def parse_ent_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
def parse_col_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
50
recipes/moldovaazi.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
azi.md
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MoldovaAzi(BasicNewsRecipe):
|
||||
title = u'Moldova Azi'
|
||||
language = 'ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Moldova pe internet'
|
||||
publisher = 'Moldova Azi'
|
||||
category = 'Ziare,Stiri,Moldova'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.azi.md/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'in'})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'in-more-stories'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'comment_wrapper'})
|
||||
, dict(name='div', attrs={'class':'box-title4'})
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://www.azi.md/ro/feeds/0/rss201') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
50
recipes/newsmoldova.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
newsmoldova.md
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewsMoldova(BasicNewsRecipe):
|
||||
title = u'Agen\u0163ia de \u015ftiri Moldova'
|
||||
language = 'ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Agen\u0163ia de \u015ftiri Moldova'
|
||||
publisher = 'Moldova'
|
||||
category = 'Ziare,Stiri,Moldova'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.newsmoldova.md/i/logo_top_md.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'main-article-index article'})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'actions'})
|
||||
, dict(name='li', attrs={'class':'invisible'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'actions'})
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://newsmoldova.md/export/rss2/archive/index.xml') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -3,7 +3,6 @@ __license__ = 'GPL v3'
|
||||
'''
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.web.feeds import Feed
|
||||
|
||||
|
||||
class ReadersDigest(BasicNewsRecipe):
|
||||
@ -38,151 +37,20 @@ class ReadersDigest(BasicNewsRecipe):
|
||||
'''
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='h4', attrs={'class':'close'}),
|
||||
dict(name='div', attrs={'class':'fromLine'}),
|
||||
dict(name='img', attrs={'class':'colorTag'}),
|
||||
dict(name='div', attrs={'id':'sponsorArticleHeader'}),
|
||||
dict(name='div', attrs={'class':'horizontalAd'}),
|
||||
dict(name='div', attrs={'id':'imageCounterLeft'}),
|
||||
dict(name='div', attrs={'id':'commentsPrint'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
('New in RD', 'http://feeds.rd.com/ReadersDigest'),
|
||||
('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
|
||||
('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
|
||||
('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
|
||||
('Food', 'http://www.rd.com/food/feed'),
|
||||
('Health', 'http://www.rd.com/health/feed'),
|
||||
('Home', 'http://www.rd.com/home/feed'),
|
||||
('Family', 'http://www.rd.com/family/feed'),
|
||||
('Money', 'http://www.rd.com/money/feed'),
|
||||
('Travel', 'http://www.rd.com/travel/feed'),
|
||||
]
|
||||
|
||||
cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def print_version(self, url):
|
||||
|
||||
# Get the identity number of the current article and append it to the root print URL
|
||||
|
||||
if url.find('/article') > 0:
|
||||
ident = url[url.find('/article')+8:url.find('.html?')-4]
|
||||
url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
|
||||
|
||||
elif url.find('/post') > 0:
|
||||
|
||||
# in this case, have to get the page itself to derive the Print page.
|
||||
soup = self.index_to_soup(url)
|
||||
newsoup = soup.find('ul',attrs={'class':'printBlock'})
|
||||
url = 'http://www.rd.com' + newsoup('a')[0]['href']
|
||||
url = url[0:url.find('&Keep')]
|
||||
|
||||
return url
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
pages = [
|
||||
('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
|
||||
# useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
|
||||
('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
|
||||
|
||||
keep_only_tags = dict(id='main-content')
|
||||
remove_tags = [
|
||||
{'class':['post-categories']},
|
||||
]
|
||||
|
||||
feeds = []
|
||||
|
||||
for page in pages:
|
||||
section, url, divider, attrList = page
|
||||
newArticles = self.page_parse(url, divider, attrList)
|
||||
feeds.append((section,newArticles))
|
||||
|
||||
# after the pages of the site have been processed, parse several RSS feeds for additional sections
|
||||
newfeeds = Feed()
|
||||
newfeeds = self.parse_rss()
|
||||
|
||||
|
||||
# The utility code in parse_rss returns a Feed object. Convert each feed/article combination into a form suitable
|
||||
# for this module (parse_index).
|
||||
|
||||
for feed in newfeeds:
|
||||
newArticles = []
|
||||
for article in feed.articles:
|
||||
newArt = {
|
||||
'title' : article.title,
|
||||
'url' : article.url,
|
||||
'date' : article.date,
|
||||
'description' : article.text_summary
|
||||
}
|
||||
newArticles.append(newArt)
|
||||
|
||||
|
||||
# New and Blogs should be the first two feeds.
|
||||
if feed.title == 'New in RD':
|
||||
feeds.insert(0,(feed.title,newArticles))
|
||||
elif feed.title == 'Blogs':
|
||||
feeds.insert(1,(feed.title,newArticles))
|
||||
else:
|
||||
feeds.append((feed.title,newArticles))
|
||||
|
||||
|
||||
return feeds
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def page_parse(self, mainurl, divider, attrList):
|
||||
|
||||
articles = []
|
||||
mainsoup = self.index_to_soup(mainurl)
|
||||
for item in mainsoup.findAll(attrs=attrList):
|
||||
newArticle = {
|
||||
'title' : item('img')[0]['alt'],
|
||||
'url' : 'http://www.rd.com'+item('a')[0]['href'],
|
||||
'date' : '',
|
||||
'description' : ''
|
||||
}
|
||||
articles.append(newArticle)
|
||||
|
||||
|
||||
|
||||
return articles
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def parse_rss (self):
|
||||
|
||||
# Do the "official" parse_feeds first
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
|
||||
# Loop thru the articles in all feeds to find articles with "recipe" in it
|
||||
recipeArticles = []
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if curarticle.title.upper().find('RECIPE') >= 0:
|
||||
recipeArticles.append(curarticle)
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
|
||||
# If there are any recipes found, create a new Feed object and append.
|
||||
if len(recipeArticles) > 0:
|
||||
pfeed = Feed()
|
||||
pfeed.title = 'Recipes'
|
||||
pfeed.descrition = 'Recipe Feed (Virtual)'
|
||||
pfeed.image_url = None
|
||||
pfeed.oldest_article = 30
|
||||
pfeed.id_counter = len(recipeArticles)
|
||||
# Create a new Feed, add the recipe articles, and then append
|
||||
# to "official" list of feeds
|
||||
pfeed.articles = recipeArticles[:]
|
||||
feeds.append(pfeed)
|
||||
|
||||
return feeds
|
||||
|
||||
|
53
recipes/replicavedetelor.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, '
|
||||
'''
|
||||
replicavedetelor.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ReplicaVedetelor(BasicNewsRecipe):
|
||||
title = u'Replica Vedetelor'
|
||||
__author__ = u'Silviu Cotoara'
|
||||
description = u'Ofer\u0103 vedetelor dreptul la replic\u0103'
|
||||
publisher = 'Replica Vedetelor'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Vedete'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.webart-software.eu/_pics/lucrari_referinta/medium/84/1-Replica-Vedetelor.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'zona-continut'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'id':['lista-imagini']})
|
||||
, dict(name='form', attrs={'id':['f-trimite-unui-prieten']})
|
||||
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='form', attrs={'id':['f-trimite-unui-prieten']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.replicavedetelor.ro/feed')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
__author__ = u'kwetal and Tomasz Dlugosz'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
@ -38,6 +38,8 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
@ -48,6 +50,13 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
.fot{font-size: x-small; color: #666666;}
|
||||
'''
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
if ('advertisement' in soup.find('title').string.lower()):
|
||||
href = soup.find('a').get('href')
|
||||
return self.index_to_soup(href, raw=True)
|
||||
else:
|
||||
return None
|
||||
|
||||
def print_version(self, url):
|
||||
start, sep, rest = url.rpartition('/')
|
||||
forget, sep, index = rest.rpartition(',')
|
||||
|
@ -33,7 +33,7 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
|
||||
elif c.name.endswith('_password'):
|
||||
br[c.name] = self.password
|
||||
raw = br.submit().read()
|
||||
if '>Logout' not in raw:
|
||||
if 'You have been logged in' not in raw:
|
||||
raise ValueError('Failed to login, check your username and password')
|
||||
return br
|
||||
|
||||
|
@ -1,17 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TelepolisNews(BasicNewsRecipe):
|
||||
title = u'Telepolis (News+Artikel)'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
__author__ = 'syntaxis'
|
||||
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
|
||||
description = 'News from telepolis'
|
||||
description = 'News from Telepolis'
|
||||
category = 'news'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
@ -20,14 +15,19 @@ class TelepolisNews(BasicNewsRecipe):
|
||||
encoding = "utf-8"
|
||||
language = 'de'
|
||||
|
||||
use_embedded_content =False
|
||||
|
||||
remove_empty_feeds = True
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
|
||||
|
||||
keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
|
||||
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]
|
||||
|
||||
keep_only_tags = [dict(name = 'div',attrs={'class':'head'}),dict(name = 'div',attrs={'class':'leftbox'}),dict(name='td',attrs={'class':'strict'})]
|
||||
remove_tags = [ dict(name='td',attrs={'class':'blogbottom'}),
|
||||
dict(name='div',attrs={'class':'forum'}), dict(name='div',attrs={'class':'social'}),dict(name='div',attrs={'class':'blog-letter p-news'}),
|
||||
dict(name='div',attrs={'class':'blog-sub'}),dict(name='div',attrs={'class':'version-div'}),dict(name='div',attrs={'id':'breadcrumb'})
|
||||
,dict(attrs={'class':'tp-url'}),dict(attrs={'class':'blog-name entry_'}) ]
|
||||
|
||||
remove_tags_after = [dict(name='span', attrs={'class':['breadcrumb']})]
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
|
||||
|
||||
@ -39,15 +39,8 @@ class TelepolisNews(BasicNewsRecipe):
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''if the linked article is of kind artikel don't take it'''
|
||||
if (article.link.count('artikel') > 1) :
|
||||
return None
|
||||
return article.link
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
|
||||
|
@ -10,6 +10,8 @@ import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Time(BasicNewsRecipe):
|
||||
recipe_disabled = ('This recipe has been disabled as TIME no longer'
|
||||
' publish complete articles on the web.')
|
||||
title = u'Time'
|
||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||
description = 'Weekly magazine'
|
||||
|
67
recipes/united_daily.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class UnitedDaily(BasicNewsRecipe):
|
||||
title = u'聯合新聞網'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
|
||||
(u'政治', u'http://udn.com/udnrss/politics.xml'),
|
||||
(u'社會', u'http://udn.com/udnrss/social.xml'),
|
||||
(u'生活', u'http://udn.com/udnrss/life.xml'),
|
||||
(u'綜合', u'http://udn.com/udnrss/education.xml'),
|
||||
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
|
||||
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
|
||||
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
|
||||
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
|
||||
(u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
|
||||
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
|
||||
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
|
||||
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
|
||||
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
|
||||
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
|
||||
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
|
||||
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
|
||||
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
|
||||
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
|
||||
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
|
||||
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
|
||||
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
|
||||
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
|
||||
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
|
||||
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
|
||||
(u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
|
||||
(u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
|
||||
(u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
|
||||
(u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
|
||||
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
|
||||
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
|
||||
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
|
||||
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
|
||||
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
|
||||
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
|
||||
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
|
||||
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
|
||||
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
|
||||
]
|
||||
|
||||
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
|
||||
|
||||
__author__ = 'Eddie Lau'
|
||||
__version__ = '1.0'
|
||||
language = 'zh'
|
||||
publisher = 'United Daily News Group'
|
||||
description = 'United Daily (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
||||
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
|
||||
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]
|
20
recipes/welt_der_physik.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'Welt der Physik'
|
||||
__author__ = 'schuster'
|
||||
remove_tags_befor = [dict(name='div', attrs={'class':'inhalt_bild_text_printonly'})]
|
||||
remove_tags_after = [dict(name='span', attrs={'class':'clearinhalt_bild'})]
|
||||
remove_tags = [dict(attrs={'class':['invisible', 'searchfld', 'searchbtn', 'topnavi', 'topsearch']}),
|
||||
dict(id=['naservice', 'phservicemenu', '',]),
|
||||
dict(name=['naservice'])]
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
|
||||
|
||||
feeds = [(u'Nachrichten und Neuigkeiten', u'http://www.weltderphysik.de/rss/alles.xml')]
|
53
recipes/ziuaveche.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
ziuaveche.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ZiuaVeche(BasicNewsRecipe):
|
||||
title = u'Ziua Veche'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Cotidian online'
|
||||
publisher = 'Ziua Veche'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Cotidiane,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.ziuaveche.ro/wp-content/themes/tema/images/zv-logo-alb-old.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'singlePost'})
|
||||
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'LikePluginPagelet'})
|
||||
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'LikePluginPagelet'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.ziuaveche.ro/feed/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -11,7 +11,7 @@ __all__ = [
|
||||
'build', 'build_pdf2xml', 'server',
|
||||
'gui',
|
||||
'develop', 'install',
|
||||
'resources',
|
||||
'kakasi', 'resources',
|
||||
'check',
|
||||
'sdist',
|
||||
'manual', 'tag_release',
|
||||
@ -49,8 +49,9 @@ gui = GUI()
|
||||
from setup.check import Check
|
||||
check = Check()
|
||||
|
||||
from setup.resources import Resources
|
||||
from setup.resources import Resources, Kakasi
|
||||
resources = Resources()
|
||||
kakasi = Kakasi()
|
||||
|
||||
from setup.publish import Manual, TagRelease, Stage1, Stage2, \
|
||||
Stage3, Stage4, Publish
|
||||
|
@ -30,11 +30,12 @@ int report_libc_error(const char *msg) {
|
||||
}
|
||||
|
||||
int pyobject_to_int(PyObject *res) {
|
||||
int ret; PyObject *tmp;
|
||||
tmp = PyNumber_Int(res);
|
||||
if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
|
||||
else ret = (int)PyInt_AS_LONG(tmp);
|
||||
|
||||
int ret = 0; PyObject *tmp;
|
||||
if (res != NULL) {
|
||||
tmp = PyNumber_Int(res);
|
||||
if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
|
||||
else ret = (int)PyInt_AS_LONG(tmp);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,7 @@ class Win32(VMInstaller):
|
||||
FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
|
||||
INSTALLER_EXT = 'msi'
|
||||
SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
|
||||
BUILD_BUILD = ['python setup.py kakasi',] + VMInstaller.BUILD_BUILD
|
||||
|
||||
def download_installer(self):
|
||||
installer = self.installer()
|
||||
|
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
|
||||
from setup.installer.windows.wix import WixMixIn
|
||||
|
||||
OPENSSL_DIR = r'Q:\openssl'
|
||||
QT_DIR = 'Q:\\Qt\\4.7.2'
|
||||
QT_DIR = 'Q:\\Qt\\4.7.3'
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUSB_DIR = 'C:\\libusb'
|
||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
|
@ -23,6 +23,9 @@ wWinMain(HINSTANCE Inst, HINSTANCE PrevInst,
|
||||
ret = execute_python_entrypoint(BASENAME, MODULE, FUNCTION,
|
||||
stdout_redirect, stderr_redirect);
|
||||
|
||||
if (stdout != NULL) fclose(stdout);
|
||||
if (stderr != NULL) fclose(stderr);
|
||||
|
||||
DeleteFile(stdout_redirect);
|
||||
DeleteFile(stderr_redirect);
|
||||
|
||||
|
@ -11,9 +11,6 @@
|
||||
SummaryCodepage='1252' />
|
||||
|
||||
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
|
||||
<!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
|
||||
after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
|
||||
<Property Id='REINSTALLMODE' Value='emus'/>
|
||||
|
||||
<Upgrade Id="{upgrade_code}">
|
||||
<UpgradeVersion Maximum="{version}"
|
||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob
|
||||
from zlib import compress
|
||||
|
||||
from setup import Command, basenames, __appname__
|
||||
from setup import Command, basenames, __appname__, iswindows
|
||||
|
||||
def get_opts_from_parser(parser):
|
||||
def do_opt(opt):
|
||||
@ -23,13 +23,119 @@ def get_opts_from_parser(parser):
|
||||
for o in g.option_list:
|
||||
for x in do_opt(o): yield x
|
||||
|
||||
class Resources(Command):
|
||||
class Kakasi(Command):
|
||||
|
||||
description = 'Compile various needed calibre resources'
|
||||
description = 'Compile resources for unihandecode'
|
||||
|
||||
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
|
||||
'ebooks', 'unihandecode', 'pykakasi')
|
||||
|
||||
def run(self, opts):
|
||||
self.records = {}
|
||||
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanwadict2.db')
|
||||
base = os.path.dirname(dest)
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
|
||||
if self.newer(dest, src) or iswindows:
|
||||
self.info('\tGenerating Kanwadict')
|
||||
|
||||
for line in open(src, "r"):
|
||||
self.parsekdict(line)
|
||||
self.kanwaout(dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','itaijidict2.pickle')
|
||||
|
||||
if self.newer(dest, src) or iswindows:
|
||||
self.info('\tGenerating Itaijidict')
|
||||
self.mkitaiji(src, dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanadict2.pickle')
|
||||
|
||||
if self.newer(dest, src) or iswindows:
|
||||
self.info('\tGenerating kanadict')
|
||||
self.mkkanadict(src, dest)
|
||||
|
||||
return
|
||||
|
||||
|
||||
def mkitaiji(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "r"):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
|
||||
dic[pair[0]] = pair[1]
|
||||
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||
|
||||
def mkkanadict(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "r"):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
(alpha, kana) = line.split(' ')
|
||||
dic[kana] = alpha
|
||||
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||
|
||||
def parsekdict(self, line):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
return
|
||||
(yomi, kanji) = line.split(' ')
|
||||
if ord(yomi[-1:]) <= ord('z'):
|
||||
tail = yomi[-1:]
|
||||
yomi = yomi[:-1]
|
||||
else:
|
||||
tail = ''
|
||||
self.updaterec(kanji, yomi, tail)
|
||||
|
||||
def updaterec(self, kanji, yomi, tail):
|
||||
key = "%04x"%ord(kanji[0])
|
||||
if key in self.records:
|
||||
if kanji in self.records[key]:
|
||||
rec = self.records[key][kanji]
|
||||
rec.append((yomi,tail))
|
||||
self.records[key].update( {kanji: rec} )
|
||||
else:
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
else:
|
||||
self.records[key] = {}
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
|
||||
def kanwaout(self, out):
|
||||
try:
|
||||
# Needed as otherwise anydbm tries to create a gdbm db when the db
|
||||
# created on Unix is found
|
||||
os.remove(out)
|
||||
except:
|
||||
pass
|
||||
dic = anydbm.open(out, 'n')
|
||||
for (k, v) in self.records.iteritems():
|
||||
dic[k] = compress(marshal.dumps(v))
|
||||
dic.close()
|
||||
|
||||
def clean(self):
|
||||
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
|
||||
if os.path.exists(kakasi):
|
||||
shutil.rmtree(kakasi)
|
||||
|
||||
class Resources(Command):
|
||||
|
||||
description = 'Compile various needed calibre resources'
|
||||
sub_commands = ['kakasi']
|
||||
|
||||
def run(self, opts):
|
||||
scripts = {}
|
||||
for x in ('console', 'gui'):
|
||||
@ -117,108 +223,13 @@ class Resources(Command):
|
||||
import json
|
||||
json.dump(function_dict, open(dest, 'wb'), indent=4)
|
||||
|
||||
self.run_kakasi(opts)
|
||||
|
||||
def run_kakasi(self, opts):
|
||||
self.records = {}
|
||||
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanwadict2.db')
|
||||
base = os.path.dirname(dest)
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating Kanwadict')
|
||||
|
||||
for line in open(src, "r"):
|
||||
self.parsekdict(line)
|
||||
self.kanwaout(dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','itaijidict2.pickle')
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating Itaijidict')
|
||||
self.mkitaiji(src, dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanadict2.pickle')
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating kanadict')
|
||||
self.mkkanadict(src, dest)
|
||||
|
||||
return
|
||||
|
||||
|
||||
def mkitaiji(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "r"):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
|
||||
dic[pair[0]] = pair[1]
|
||||
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||
|
||||
def mkkanadict(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "r"):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
(alpha, kana) = line.split(' ')
|
||||
dic[kana] = alpha
|
||||
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||
|
||||
def parsekdict(self, line):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
return
|
||||
(yomi, kanji) = line.split(' ')
|
||||
if ord(yomi[-1:]) <= ord('z'):
|
||||
tail = yomi[-1:]
|
||||
yomi = yomi[:-1]
|
||||
else:
|
||||
tail = ''
|
||||
self.updaterec(kanji, yomi, tail)
|
||||
|
||||
def updaterec(self, kanji, yomi, tail):
|
||||
key = "%04x"%ord(kanji[0])
|
||||
if key in self.records:
|
||||
if kanji in self.records[key]:
|
||||
rec = self.records[key][kanji]
|
||||
rec.append((yomi,tail))
|
||||
self.records[key].update( {kanji: rec} )
|
||||
else:
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
else:
|
||||
self.records[key] = {}
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
|
||||
def kanwaout(self, out):
|
||||
dic = anydbm.open(out, 'c')
|
||||
for (k, v) in self.records.iteritems():
|
||||
dic[k] = compress(marshal.dumps(v))
|
||||
dic.close()
|
||||
|
||||
|
||||
def clean(self):
|
||||
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
|
||||
x = self.j(self.RESOURCES, x+'.pickle')
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
|
||||
if os.path.exists(kakasi):
|
||||
shutil.rmtree(kakasi)
|
||||
|
||||
from setup.commands import kakasi
|
||||
kakasi.clean()
|
||||
|
||||
|
||||
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 7, 59)
|
||||
numeric_version = (0, 8, 1)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -449,7 +449,7 @@ class CatalogPlugin(Plugin): # {{{
|
||||
['author_sort','authors','comments','cover','formats',
|
||||
'id','isbn','ondevice','pubdate','publisher','rating',
|
||||
'series_index','series','size','tags','timestamp',
|
||||
'title','uuid'])
|
||||
'title_sort','title','uuid'])
|
||||
all_custom_fields = set(db.custom_field_keys())
|
||||
all_fields = all_std_fields.union(all_custom_fields)
|
||||
|
||||
@ -607,7 +607,7 @@ class StoreBase(Plugin): # {{{
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'John Schember'
|
||||
type = _('Store')
|
||||
minimum_calibre_version = (0, 7, 59)
|
||||
minimum_calibre_version = (0, 8, 0)
|
||||
|
||||
actual_plugin = None
|
||||
|
||||
|
@ -628,8 +628,9 @@ from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
from calibre.ebooks.metadata.sources.douban import Douban
|
||||
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive]
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
|
||||
|
||||
# }}}
|
||||
|
||||
@ -1096,6 +1097,11 @@ class StoreAmazonKindleStore(StoreBase):
|
||||
description = _('Kindle books from Amazon')
|
||||
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
|
||||
|
||||
class StoreAmazonDEKindleStore(StoreBase):
|
||||
name = 'Amazon DE Kindle'
|
||||
description = _('Kindle eBooks')
|
||||
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
|
||||
|
||||
class StoreAmazonUKKindleStore(StoreBase):
|
||||
name = 'Amazon UK Kindle'
|
||||
description = _('Kindle books from Amazon.uk')
|
||||
@ -1111,6 +1117,11 @@ class StoreBNStore(StoreBase):
|
||||
description = _('Books, Textbooks, eBooks, Toys, Games and More.')
|
||||
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
|
||||
|
||||
class StoreBeamEBooksDEStore(StoreBase):
|
||||
name = 'Beam EBooks DE'
|
||||
description = _('der eBook Shop')
|
||||
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
|
||||
|
||||
class StoreBeWriteStore(StoreBase):
|
||||
name = 'BeWrite Books'
|
||||
description = _('Publishers of fine books.')
|
||||
@ -1126,7 +1137,12 @@ class StoreEbookscomStore(StoreBase):
|
||||
description = _('The digital bookstore.')
|
||||
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
|
||||
|
||||
class StoreEHarlequinStoretore(StoreBase):
|
||||
class StoreEPubBuyDEStore(StoreBase):
|
||||
name = 'EPUBBuy DE'
|
||||
description = _('EPUBReaders eBook Shop')
|
||||
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
|
||||
|
||||
class StoreEHarlequinStore(StoreBase):
|
||||
name = 'eHarlequin'
|
||||
description = _('entertain, enrich, inspire.')
|
||||
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
|
||||
@ -1136,6 +1152,11 @@ class StoreFeedbooksStore(StoreBase):
|
||||
description = _('Read anywhere.')
|
||||
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
|
||||
|
||||
class StoreFoylesUKStore(StoreBase):
|
||||
name = 'Foyles UK'
|
||||
description = _('Foyles of London, online')
|
||||
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
|
||||
|
||||
class StoreGutenbergStore(StoreBase):
|
||||
name = 'Project Gutenberg'
|
||||
description = _('The first producer of free ebooks.')
|
||||
@ -1171,22 +1192,23 @@ class StoreWaterstonesUKStore(StoreBase):
|
||||
description = _('Feel every word')
|
||||
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
|
||||
|
||||
class StoreFoylesUKStore(StoreBase):
|
||||
name = 'Foyles UK'
|
||||
description = _('Foyles of London, online')
|
||||
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
|
||||
class StoreWeightlessBooksStore(StoreBase):
|
||||
name = 'Weightless Books'
|
||||
description = '(e)Books That Don\'t Weigh You Down'
|
||||
actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
|
||||
|
||||
class AmazonDEKindleStore(StoreBase):
|
||||
name = 'Amazon DE Kindle'
|
||||
description = _('Kindle eBooks')
|
||||
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
|
||||
class StoreWizardsTowerBooksStore(StoreBase):
|
||||
name = 'Wizards Tower Books'
|
||||
description = 'Wizard\'s Tower Press'
|
||||
actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
|
||||
|
||||
plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore,
|
||||
plugins += [StoreAmazonKindleStore, StoreAmazonDEKindleStore, StoreAmazonUKKindleStore,
|
||||
StoreBaenWebScriptionStore, StoreBNStore,
|
||||
StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
|
||||
StoreEHarlequinStoretore, StoreFeedbooksStore,
|
||||
StoreBeamEBooksDEStore, StoreBeWriteStore,
|
||||
StoreDieselEbooksStore, StoreEbookscomStore, StoreEPubBuyDEStore,
|
||||
StoreEHarlequinStore, StoreFeedbooksStore,
|
||||
StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
|
||||
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
|
||||
StoreWaterstonesUKStore]
|
||||
StoreWaterstonesUKStore, StoreWeightlessBooksStore, StoreWizardsTowerBooksStore]
|
||||
|
||||
# }}}
|
||||
|
@ -253,7 +253,7 @@ class OutputProfile(Plugin):
|
||||
periodical_date_in_title = True
|
||||
|
||||
#: Characters used in jackets and catalogs
|
||||
missing_char = u'x'
|
||||
missing_char = u'x'
|
||||
ratings_char = u'*'
|
||||
empty_ratings_char = u' '
|
||||
read_char = u'+'
|
||||
@ -293,38 +293,38 @@ class iPadOutput(OutputProfile):
|
||||
}
|
||||
]
|
||||
|
||||
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
|
||||
ratings_char = u'\u2605' # filled star
|
||||
empty_ratings_char = u'\u2606' # hollow star
|
||||
read_char = u'\u2713' # check mark
|
||||
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
|
||||
ratings_char = u'\u2605' # filled star
|
||||
empty_ratings_char = u'\u2606' # hollow star
|
||||
read_char = u'\u2713' # check mark
|
||||
|
||||
touchscreen = True
|
||||
# touchscreen_news_css {{{
|
||||
touchscreen_news_css = u'''
|
||||
/* hr used in articles */
|
||||
.article_articles_list {
|
||||
/* hr used in articles */
|
||||
.article_articles_list {
|
||||
width:18%;
|
||||
}
|
||||
}
|
||||
.article_link {
|
||||
color: #593f29;
|
||||
color: #593f29;
|
||||
font-style: italic;
|
||||
}
|
||||
.article_next {
|
||||
-webkit-border-top-right-radius:4px;
|
||||
-webkit-border-bottom-right-radius:4px;
|
||||
-webkit-border-top-right-radius:4px;
|
||||
-webkit-border-bottom-right-radius:4px;
|
||||
font-style: italic;
|
||||
width:32%;
|
||||
}
|
||||
|
||||
.article_prev {
|
||||
-webkit-border-top-left-radius:4px;
|
||||
-webkit-border-bottom-left-radius:4px;
|
||||
-webkit-border-top-left-radius:4px;
|
||||
-webkit-border-bottom-left-radius:4px;
|
||||
font-style: italic;
|
||||
width:32%;
|
||||
}
|
||||
.article_sections_list {
|
||||
.article_sections_list {
|
||||
width:18%;
|
||||
}
|
||||
}
|
||||
.articles_link {
|
||||
font-weight: bold;
|
||||
}
|
||||
@ -334,8 +334,8 @@ class iPadOutput(OutputProfile):
|
||||
|
||||
|
||||
.caption_divider {
|
||||
border:#ccc 1px solid;
|
||||
}
|
||||
border:#ccc 1px solid;
|
||||
}
|
||||
|
||||
.touchscreen_navbar {
|
||||
background:#c3bab2;
|
||||
@ -357,50 +357,50 @@ class iPadOutput(OutputProfile):
|
||||
text-align:center;
|
||||
}
|
||||
|
||||
.touchscreen_navbar td a:link {
|
||||
color: #593f29;
|
||||
text-decoration: none;
|
||||
}
|
||||
.touchscreen_navbar td a:link {
|
||||
color: #593f29;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* Index formatting */
|
||||
.publish_date {
|
||||
text-align:center;
|
||||
}
|
||||
.divider {
|
||||
border-bottom:1em solid white;
|
||||
border-top:1px solid gray;
|
||||
}
|
||||
/* Index formatting */
|
||||
.publish_date {
|
||||
text-align:center;
|
||||
}
|
||||
.divider {
|
||||
border-bottom:1em solid white;
|
||||
border-top:1px solid gray;
|
||||
}
|
||||
|
||||
hr.caption_divider {
|
||||
border-color:black;
|
||||
border-style:solid;
|
||||
border-width:1px;
|
||||
}
|
||||
hr.caption_divider {
|
||||
border-color:black;
|
||||
border-style:solid;
|
||||
border-width:1px;
|
||||
}
|
||||
|
||||
/* Feed summary formatting */
|
||||
.article_summary {
|
||||
display:inline-block;
|
||||
}
|
||||
display:inline-block;
|
||||
}
|
||||
.feed {
|
||||
font-family:sans-serif;
|
||||
font-weight:bold;
|
||||
font-size:larger;
|
||||
}
|
||||
}
|
||||
|
||||
.feed_link {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.feed_next {
|
||||
-webkit-border-top-right-radius:4px;
|
||||
-webkit-border-bottom-right-radius:4px;
|
||||
-webkit-border-top-right-radius:4px;
|
||||
-webkit-border-bottom-right-radius:4px;
|
||||
font-style: italic;
|
||||
width:40%;
|
||||
}
|
||||
|
||||
.feed_prev {
|
||||
-webkit-border-top-left-radius:4px;
|
||||
-webkit-border-bottom-left-radius:4px;
|
||||
-webkit-border-top-left-radius:4px;
|
||||
-webkit-border-bottom-left-radius:4px;
|
||||
font-style: italic;
|
||||
width:40%;
|
||||
}
|
||||
@ -410,24 +410,24 @@ class iPadOutput(OutputProfile):
|
||||
font-size: 160%;
|
||||
}
|
||||
|
||||
.feed_up {
|
||||
.feed_up {
|
||||
font-weight: bold;
|
||||
width:20%;
|
||||
}
|
||||
}
|
||||
|
||||
.summary_headline {
|
||||
font-weight:bold;
|
||||
text-align:left;
|
||||
}
|
||||
}
|
||||
|
||||
.summary_byline {
|
||||
text-align:left;
|
||||
font-family:monospace;
|
||||
}
|
||||
}
|
||||
|
||||
.summary_text {
|
||||
text-align:left;
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
# }}}
|
||||
@ -617,8 +617,8 @@ class KindleOutput(OutputProfile):
|
||||
supports_mobi_indexing = True
|
||||
periodical_date_in_title = False
|
||||
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
ratings_char = u'\u2605'
|
||||
read_char = u'\u2713'
|
||||
|
||||
@ -642,8 +642,8 @@ class KindleDXOutput(OutputProfile):
|
||||
#comic_screen_size = (741, 1022)
|
||||
supports_mobi_indexing = True
|
||||
periodical_date_in_title = False
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
ratings_char = u'\u2605'
|
||||
read_char = u'\u2713'
|
||||
mobi_ems_per_blockquote = 2.0
|
||||
|
@ -92,8 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
|
||||
config['enabled_plugins'] = ep
|
||||
|
||||
default_disabled_plugins = set([
|
||||
'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
|
||||
'Kent District Library'
|
||||
'Overdrive', 'Douban Books',
|
||||
])
|
||||
|
||||
def is_disabled(plugin):
|
||||
|
@ -109,7 +109,7 @@ class ANDROID(USBMS):
|
||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB']
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']
|
||||
|
@ -203,9 +203,11 @@ class ITUNES(DriverBase):
|
||||
# 0x1294 iPhone 3GS
|
||||
# 0x1297 iPhone 4
|
||||
# 0x129a iPad
|
||||
# 0x12a2 iPad2
|
||||
# 0x129f iPad2 (WiFi)
|
||||
# 0x12a2 iPad2 (GSM)
|
||||
# 0x12a3 iPad2 (CDMA)
|
||||
VENDOR_ID = [0x05ac]
|
||||
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x12a2]
|
||||
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a2,0x12a3]
|
||||
BCD = [0x01]
|
||||
|
||||
# Plugboard ID
|
||||
@ -506,7 +508,7 @@ class ITUNES(DriverBase):
|
||||
if self.iTunes:
|
||||
# Check for connected book-capable device
|
||||
self.sources = self._get_sources()
|
||||
if 'iPod' in self.sources:
|
||||
if 'iPod' in self.sources and not self.ejected:
|
||||
#if DEBUG:
|
||||
#sys.stdout.write('.')
|
||||
#sys.stdout.flush()
|
||||
@ -2036,16 +2038,17 @@ class ITUNES(DriverBase):
|
||||
if 'iPod' in self.sources:
|
||||
connected_device = self.sources['iPod']
|
||||
device = self.iTunes.sources[connected_device]
|
||||
dev_books = None
|
||||
for pl in device.playlists():
|
||||
if pl.special_kind() == appscript.k.Books:
|
||||
if DEBUG:
|
||||
self.log.info(" Book playlist: '%s'" % (pl.name()))
|
||||
books = pl.file_tracks()
|
||||
dev_books = pl.file_tracks()
|
||||
break
|
||||
else:
|
||||
self.log.error(" book_playlist not found")
|
||||
|
||||
for book in books:
|
||||
for book in dev_books:
|
||||
# This may need additional entries for international iTunes users
|
||||
if book.kind() in self.Audiobooks:
|
||||
if DEBUG:
|
||||
|
@ -64,7 +64,7 @@ class HANLINV3(USBMS):
|
||||
return names
|
||||
|
||||
def linux_swap_drives(self, drives):
|
||||
if len(drives) < 2: return drives
|
||||
if len(drives) < 2 or not drives[1] or not drives[2]: return drives
|
||||
drives = list(drives)
|
||||
t = drives[0]
|
||||
drives[0] = drives[1]
|
||||
@ -95,7 +95,6 @@ class HANLINV5(HANLINV3):
|
||||
gui_name = 'Hanlin V5'
|
||||
description = _('Communicate with Hanlin V5 eBook readers.')
|
||||
|
||||
|
||||
VENDOR_ID = [0x0492]
|
||||
PRODUCT_ID = [0x8813]
|
||||
BCD = [0x319]
|
||||
|
@ -164,7 +164,7 @@ class APNXBuilder(object):
|
||||
if c == '/':
|
||||
closing = True
|
||||
continue
|
||||
elif c in ('d', 'p'):
|
||||
elif c == 'p':
|
||||
if closing:
|
||||
in_p = False
|
||||
else:
|
||||
|
@ -38,7 +38,7 @@ class KOBO(USBMS):
|
||||
|
||||
VENDOR_ID = [0x2237]
|
||||
PRODUCT_ID = [0x4161]
|
||||
BCD = [0x0110]
|
||||
BCD = [0x0110, 0x0323]
|
||||
|
||||
VENDOR_NAME = ['KOBO_INC', 'KOBO']
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['.KOBOEREADER', 'EREADER']
|
||||
|
@ -68,9 +68,9 @@ class USER_DEFINED(USBMS):
|
||||
'is prepended to any send_to_device template') + '</p>',
|
||||
]
|
||||
EXTRA_CUSTOMIZATION_DEFAULT = [
|
||||
'0x0000',
|
||||
'0x0000',
|
||||
'0x0000',
|
||||
'0xffff',
|
||||
'0xffff',
|
||||
'0xffff',
|
||||
None,
|
||||
'',
|
||||
'',
|
||||
|
@ -68,7 +68,8 @@ def check_command_line_options(parser, args, log):
|
||||
raise SystemExit(1)
|
||||
|
||||
output = args[2]
|
||||
if output.startswith('.') and output != '.':
|
||||
if output.startswith('.') and (output != '.' and not
|
||||
output.startswith('..')):
|
||||
output = os.path.splitext(os.path.basename(input))[0]+output
|
||||
output = os.path.abspath(output)
|
||||
|
||||
|
@ -103,10 +103,11 @@ class EPUBInput(InputFormatPlugin):
|
||||
t.set('href', guide_cover)
|
||||
t.set('title', 'Title Page')
|
||||
from calibre.ebooks import render_html_svg_workaround
|
||||
renderer = render_html_svg_workaround(guide_cover, log)
|
||||
if renderer is not None:
|
||||
open('calibre_raster_cover.jpg', 'wb').write(
|
||||
renderer)
|
||||
if os.path.exists(guide_cover):
|
||||
renderer = render_html_svg_workaround(guide_cover, log)
|
||||
if renderer is not None:
|
||||
open('calibre_raster_cover.jpg', 'wb').write(
|
||||
renderer)
|
||||
|
||||
def find_opf(self):
|
||||
def attr(n, attr):
|
||||
|
@ -7,7 +7,6 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
|
||||
from calibre import guess_type, walk
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
@ -74,22 +73,23 @@ class HTMLZInput(InputFormatPlugin):
|
||||
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
|
||||
|
||||
# Get the cover path from the OPF.
|
||||
cover_href = None
|
||||
cover_path = None
|
||||
opf = None
|
||||
for x in walk('.'):
|
||||
if os.path.splitext(x)[1].lower() in ('.opf'):
|
||||
opf = x
|
||||
break
|
||||
if opf:
|
||||
opf = OPF(opf)
|
||||
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
|
||||
opf = OPF(opf, basedir=os.getcwd())
|
||||
cover_path = opf.raster_cover
|
||||
# Set the cover.
|
||||
if cover_href:
|
||||
if cover_path:
|
||||
cdata = None
|
||||
with open(cover_href, 'rb') as cf:
|
||||
with open(os.path.join(os.getcwd(), cover_path), 'rb') as cf:
|
||||
cdata = cf.read()
|
||||
id, href = oeb.manifest.generate('cover', cover_href)
|
||||
oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
|
||||
cover_name = os.path.basename(cover_path)
|
||||
id, href = oeb.manifest.generate('cover', cover_name)
|
||||
oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata)
|
||||
oeb.guide.add('cover', 'Cover', href)
|
||||
|
||||
return oeb
|
||||
|
@ -83,6 +83,7 @@ class ArchiveExtract(FileTypePlugin):
|
||||
return of.name
|
||||
|
||||
def get_comic_book_info(d, mi):
|
||||
# See http://code.google.com/p/comicbookinfo/wiki/Example
|
||||
series = d.get('series', '')
|
||||
if series.strip():
|
||||
mi.series = series
|
||||
@ -111,6 +112,7 @@ def get_comic_book_info(d, mi):
|
||||
|
||||
|
||||
def get_cbz_metadata(stream):
|
||||
# See http://code.google.com/p/comicbookinfo/wiki/Example
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
import json
|
||||
|
@ -112,10 +112,15 @@ class Metadata(object):
|
||||
|
||||
Be careful with numeric fields since this will return True for zero as
|
||||
well as None.
|
||||
|
||||
Also returns True if the field does not exist.
|
||||
'''
|
||||
null_val = NULL_VALUES.get(field, None)
|
||||
val = getattr(self, field, None)
|
||||
return not val or val == null_val
|
||||
try:
|
||||
null_val = NULL_VALUES.get(field, None)
|
||||
val = getattr(self, field, None)
|
||||
return not val or val == null_val
|
||||
except:
|
||||
return True
|
||||
|
||||
def __getattribute__(self, field):
|
||||
_data = object.__getattribute__(self, '_data')
|
||||
|
@ -8,12 +8,11 @@ Read meta information from extZ (TXTZ, HTMLZ...) files.
|
||||
'''
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.zipfile import ZipFile, safe_replace
|
||||
|
||||
@ -31,9 +30,9 @@ def get_metadata(stream, extract_cover=True):
|
||||
opf = OPF(opf_stream)
|
||||
mi = opf.to_book_metadata()
|
||||
if extract_cover:
|
||||
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
|
||||
cover_href = opf.raster_cover
|
||||
if cover_href:
|
||||
mi.cover_data = ('jpg', zf.read(cover_href))
|
||||
mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
|
||||
except:
|
||||
return mi
|
||||
return mi
|
||||
@ -59,18 +58,15 @@ def set_metadata(stream, mi):
|
||||
except:
|
||||
pass
|
||||
if new_cdata:
|
||||
cover = opf.cover
|
||||
if not cover:
|
||||
cover = 'cover.jpg'
|
||||
cpath = posixpath.join(posixpath.dirname(opf_path), cover)
|
||||
cpath = opf.raster_cover
|
||||
if not cpath:
|
||||
cpath = 'cover.jpg'
|
||||
new_cover = _write_new_cover(new_cdata, cpath)
|
||||
replacements[cpath] = open(new_cover.name, 'rb')
|
||||
mi.cover = cover
|
||||
mi.cover = cpath
|
||||
|
||||
# Update the metadata.
|
||||
old_mi = opf.to_book_metadata()
|
||||
old_mi.smart_update(mi)
|
||||
opf.smart_update(metadata_to_opf(old_mi))
|
||||
opf.smart_update(mi, replace_metadata=True)
|
||||
newopf = StringIO(opf.render())
|
||||
safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)
|
||||
|
||||
|
@ -16,7 +16,7 @@ from lxml.html import soupparser, tostring
|
||||
|
||||
from calibre import as_unicode
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
@ -37,6 +37,92 @@ class Worker(Thread): # Get details {{{
|
||||
self.relevance, self.plugin = relevance, plugin
|
||||
self.browser = browser.clone_browser()
|
||||
self.cover_url = self.amazon_id = self.isbn = None
|
||||
self.domain = self.plugin.domain
|
||||
|
||||
months = {
|
||||
'de': {
|
||||
1 : ['jän'],
|
||||
3 : ['märz'],
|
||||
5 : ['mai'],
|
||||
6 : ['juni'],
|
||||
7 : ['juli'],
|
||||
10: ['okt'],
|
||||
12: ['dez']
|
||||
},
|
||||
'it': {
|
||||
1: ['enn'],
|
||||
2: ['febbr'],
|
||||
5: ['magg'],
|
||||
6: ['giugno'],
|
||||
7: ['luglio'],
|
||||
8: ['ag'],
|
||||
9: ['sett'],
|
||||
10: ['ott'],
|
||||
12: ['dic'],
|
||||
},
|
||||
'fr': {
|
||||
1: ['janv'],
|
||||
2: ['févr'],
|
||||
3: ['mars'],
|
||||
4: ['avril'],
|
||||
5: ['mai'],
|
||||
6: ['juin'],
|
||||
7: ['juil'],
|
||||
8: ['août'],
|
||||
9: ['sept'],
|
||||
12: ['déc'],
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
self.english_months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
||||
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
self.months = months.get(self.domain, {})
|
||||
|
||||
self.pd_xpath = '''
|
||||
//h2[text()="Product Details" or \
|
||||
text()="Produktinformation" or \
|
||||
text()="Dettagli prodotto" or \
|
||||
text()="Product details" or \
|
||||
text()="Détails sur le produit"]/../div[@class="content"]
|
||||
'''
|
||||
self.publisher_xpath = '''
|
||||
descendant::*[starts-with(text(), "Publisher:") or \
|
||||
starts-with(text(), "Verlag:") or \
|
||||
starts-with(text(), "Editore:") or \
|
||||
starts-with(text(), "Editeur")]
|
||||
'''
|
||||
self.language_xpath = '''
|
||||
descendant::*[
|
||||
starts-with(text(), "Language:") \
|
||||
or text() = "Language" \
|
||||
or text() = "Sprache:" \
|
||||
or text() = "Lingua:" \
|
||||
or starts-with(text(), "Langue") \
|
||||
]
|
||||
'''
|
||||
self.ratings_pat = re.compile(
|
||||
r'([0-9.]+) (out of|von|su|étoiles sur) (\d+)( (stars|Sternen|stelle)){0,1}')
|
||||
|
||||
lm = {
|
||||
'en': ('English', 'Englisch'),
|
||||
'fr': ('French', 'Français'),
|
||||
'it': ('Italian', 'Italiano'),
|
||||
'de': ('German', 'Deutsch'),
|
||||
}
|
||||
self.lang_map = {}
|
||||
for code, names in lm.iteritems():
|
||||
for name in names:
|
||||
self.lang_map[name] = code
|
||||
|
||||
def delocalize_datestr(self, raw):
|
||||
if not self.months:
|
||||
return raw
|
||||
ans = raw.lower()
|
||||
for i, vals in self.months.iteritems():
|
||||
for x in vals:
|
||||
ans = ans.replace(x, self.english_months[i])
|
||||
return ans
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
@ -132,7 +218,7 @@ class Worker(Thread): # Get details {{{
|
||||
self.log.exception('Error parsing cover for url: %r'%self.url)
|
||||
mi.has_cover = bool(self.cover_url)
|
||||
|
||||
pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]')
|
||||
pd = root.xpath(self.pd_xpath)
|
||||
if pd:
|
||||
pd = pd[0]
|
||||
|
||||
@ -194,30 +280,42 @@ class Worker(Thread): # Get details {{{
|
||||
def parse_authors(self, root):
|
||||
x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
|
||||
aname = root.xpath(x)
|
||||
if not aname:
|
||||
aname = root.xpath('''
|
||||
//h1[@class="parseasinTitle"]/following-sibling::*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]
|
||||
''')
|
||||
for x in aname:
|
||||
x.tail = ''
|
||||
authors = [tostring(x, encoding=unicode, method='text').strip() for x
|
||||
in aname]
|
||||
authors = [a for a in authors if a]
|
||||
return authors
|
||||
|
||||
def parse_rating(self, root):
|
||||
ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
|
||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||
if not ratings:
|
||||
ratings = root.xpath('//div[@class="buying"]/descendant::span[@class="asinReviewsSummary"]')
|
||||
if not ratings:
|
||||
ratings = root.xpath('//span[@class="crAvgStars"]/descendant::span[@class="asinReviewsSummary"]')
|
||||
if ratings:
|
||||
for elem in ratings[0].xpath('descendant::*[@title]'):
|
||||
t = elem.get('title').strip()
|
||||
m = pat.match(t)
|
||||
m = self.ratings_pat.match(t)
|
||||
if m is not None:
|
||||
return float(m.group(1))/float(m.group(2)) * 5
|
||||
return float(m.group(1))/float(m.group(3)) * 5
|
||||
|
||||
def parse_comments(self, root):
|
||||
desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
|
||||
if desc:
|
||||
desc = desc[0]
|
||||
for c in desc.xpath('descendant::*[@class="seeAll" or'
|
||||
' @class="emptyClear" or @href]'):
|
||||
' @class="emptyClear"]'):
|
||||
c.getparent().remove(c)
|
||||
for a in desc.xpath('descendant::a[@href]'):
|
||||
del a.attrib['href']
|
||||
a.tag = 'span'
|
||||
desc = tostring(desc, method='html', encoding=unicode).strip()
|
||||
|
||||
# Encoding bug in Amazon data U+fffd (replacement char)
|
||||
# in some examples it is present in place of '
|
||||
desc = desc.replace('\ufffd', "'")
|
||||
@ -246,41 +344,44 @@ class Worker(Thread): # Get details {{{
|
||||
return ('/'.join(parts[:-1]))+'/'+bn
|
||||
|
||||
def parse_isbn(self, pd):
|
||||
for x in reversed(pd.xpath(
|
||||
'descendant::*[starts-with(text(), "ISBN")]')):
|
||||
items = pd.xpath(
|
||||
'descendant::*[starts-with(text(), "ISBN")]')
|
||||
if not items:
|
||||
items = pd.xpath(
|
||||
'descendant::b[contains(text(), "ISBN:")]')
|
||||
for x in reversed(items):
|
||||
if x.tail:
|
||||
ans = check_isbn(x.tail.strip())
|
||||
if ans:
|
||||
return ans
|
||||
|
||||
def parse_publisher(self, pd):
|
||||
for x in reversed(pd.xpath(
|
||||
'descendant::*[starts-with(text(), "Publisher:")]')):
|
||||
for x in reversed(pd.xpath(self.publisher_xpath)):
|
||||
if x.tail:
|
||||
ans = x.tail.partition(';')[0]
|
||||
return ans.partition('(')[0].strip()
|
||||
|
||||
def parse_pubdate(self, pd):
|
||||
for x in reversed(pd.xpath(
|
||||
'descendant::*[starts-with(text(), "Publisher:")]')):
|
||||
for x in reversed(pd.xpath(self.publisher_xpath)):
|
||||
if x.tail:
|
||||
ans = x.tail
|
||||
date = ans.partition('(')[-1].replace(')', '').strip()
|
||||
date = self.delocalize_datestr(date)
|
||||
return parse_date(date, assume_utc=True)
|
||||
|
||||
def parse_language(self, pd):
|
||||
for x in reversed(pd.xpath(
|
||||
'descendant::*[starts-with(text(), "Language:")]')):
|
||||
for x in reversed(pd.xpath(self.language_xpath)):
|
||||
if x.tail:
|
||||
ans = x.tail.strip()
|
||||
if ans == 'English':
|
||||
return 'en'
|
||||
ans = self.lang_map.get(ans, None)
|
||||
if ans:
|
||||
return ans
|
||||
# }}}
|
||||
|
||||
class Amazon(Source):
|
||||
|
||||
name = 'Amazon.com'
|
||||
description = _('Downloads metadata from Amazon')
|
||||
description = _('Downloads metadata and covers from Amazon')
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
|
||||
@ -294,8 +395,15 @@ class Amazon(Source):
|
||||
'fr' : _('France'),
|
||||
'de' : _('Germany'),
|
||||
'uk' : _('UK'),
|
||||
'it' : _('Italy'),
|
||||
}
|
||||
|
||||
options = (
|
||||
Option('domain', 'choices', 'com', _('Amazon website to use:'),
|
||||
_('Metadata from Amazon will be fetched using this '
|
||||
'country\'s Amazon website.'), choices=AMAZON_DOMAINS),
|
||||
)
|
||||
|
||||
def get_book_url(self, identifiers): # {{{
|
||||
asin = identifiers.get('amazon', None)
|
||||
if asin is None:
|
||||
@ -304,8 +412,16 @@ class Amazon(Source):
|
||||
return ('amazon', asin, 'http://amzn.com/%s'%asin)
|
||||
# }}}
|
||||
|
||||
@property
|
||||
def domain(self):
|
||||
domain = self.prefs['domain']
|
||||
if domain not in self.AMAZON_DOMAINS:
|
||||
domain = 'com'
|
||||
|
||||
return domain
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||
domain = self.prefs.get('domain', 'com')
|
||||
domain = self.domain
|
||||
|
||||
# See the amazon detailed search page to get all options
|
||||
q = { 'search-alias' : 'aps',
|
||||
@ -338,13 +454,15 @@ class Amazon(Source):
|
||||
q['field-author'] = ' '.join(author_tokens)
|
||||
|
||||
if not ('field-keywords' in q or 'field-isbn' in q or
|
||||
('field-title' in q and 'field-author' in q)):
|
||||
('field-title' in q)):
|
||||
# Insufficient metadata to make an identify query
|
||||
return None
|
||||
|
||||
latin1q = dict([(x.encode('latin1', 'ignore'), y.encode('latin1',
|
||||
'ignore')) for x, y in
|
||||
q.iteritems()])
|
||||
if domain == 'uk':
|
||||
domain = 'co.uk'
|
||||
url = 'http://www.amazon.%s/s/?'%domain + urlencode(latin1q)
|
||||
return url
|
||||
|
||||
@ -516,11 +634,19 @@ if __name__ == '__main__': # tests {{{
|
||||
# src/calibre/ebooks/metadata/sources/amazon.py
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||
title_test, authors_test)
|
||||
test_identify_plugin(Amazon.name,
|
||||
[
|
||||
com_tests = [ # {{{
|
||||
|
||||
( # An e-book ISBN not on Amazon, one of the authors is
|
||||
# unknown to Amazon, so no popup wrapper
|
||||
( # Description has links
|
||||
{'identifiers':{'isbn': '9780671578275'}},
|
||||
[title_test('A Civil Campaign: A Comedy of Biology and Manners',
|
||||
exact=True), authors_test(['Lois McMaster Bujold'])
|
||||
]
|
||||
|
||||
),
|
||||
|
||||
( # An e-book ISBN not on Amazon, the title/author search matches
|
||||
# the Kindle edition, which has different markup for ratings and
|
||||
# isbn
|
||||
{'identifiers':{'isbn': '9780307459671'},
|
||||
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
|
||||
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
|
||||
@ -556,6 +682,38 @@ if __name__ == '__main__': # tests {{{
|
||||
|
||||
),
|
||||
|
||||
])
|
||||
] # }}}
|
||||
|
||||
de_tests = [ # {{{
|
||||
(
|
||||
{'identifiers':{'isbn': '3548283519'}},
|
||||
[title_test('Wer Wind sät',
|
||||
exact=True), authors_test(['Nele Neuhaus'])
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
|
||||
it_tests = [ # {{{
|
||||
(
|
||||
{'identifiers':{'isbn': '8838922195'}},
|
||||
[title_test('La briscola in cinque',
|
||||
exact=True), authors_test(['Marco Malvaldi'])
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
|
||||
fr_tests = [ # {{{
|
||||
(
|
||||
{'identifiers':{'isbn': '2221116798'}},
|
||||
[title_test('L\'étrange voyage de Monsieur Daldry',
|
||||
exact=True), authors_test(['Marc Levy'])
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
|
||||
test_identify_plugin(Amazon.name, com_tests)
|
||||
# }}}
|
||||
|
||||
|
@ -145,10 +145,13 @@ class Option(object):
|
||||
:param default: The default value for this option
|
||||
:param label: A short (few words) description of this option
|
||||
:param desc: A longer description of this option
|
||||
:param choices: A list of possible values, used only if type='choices'
|
||||
:param choices: A dict of possible values, used only if type='choices'.
|
||||
dict is of the form {key:human readable label, ...}
|
||||
'''
|
||||
self.name, self.type, self.default, self.label, self.desc = (name,
|
||||
type_, default, label, desc)
|
||||
if choices and not isinstance(choices, dict):
|
||||
choices = dict([(x, x) for x in choices])
|
||||
self.choices = choices
|
||||
|
||||
class Source(Plugin):
|
||||
@ -212,6 +215,9 @@ class Source(Plugin):
|
||||
def is_customizable(self):
|
||||
return True
|
||||
|
||||
def customization_help(self):
|
||||
return 'This plugin can only be customized using the GUI'
|
||||
|
||||
def config_widget(self):
|
||||
from calibre.gui2.metadata.config import ConfigWidget
|
||||
return ConfigWidget(self)
|
||||
@ -288,10 +294,10 @@ class Source(Plugin):
|
||||
parts = parts[1:] + parts[:1]
|
||||
for tok in parts:
|
||||
tok = remove_pat.sub('', tok).strip()
|
||||
if len(tok) > 2 and tok.lower() not in ('von', ):
|
||||
if len(tok) > 2 and tok.lower() not in ('von', 'van',
|
||||
_('Unknown').lower()):
|
||||
yield tok
|
||||
|
||||
|
||||
def get_title_tokens(self, title, strip_joiners=True, strip_subtitle=False):
|
||||
'''
|
||||
Take a title and return a list of tokens useful for an AND search query.
|
||||
|
347
src/calibre/ebooks/metadata/sources/douban.py
Normal file
@ -0,0 +1,347 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>; 2011, Li Fanxi <lifanxi@freemindworld.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import time
|
||||
from urllib import urlencode
|
||||
from functools import partial
|
||||
from Queue import Queue, Empty
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.date import parse_date, utcnow
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre import as_unicode
|
||||
|
||||
NAMESPACES = {
|
||||
'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
|
||||
'atom' : 'http://www.w3.org/2005/Atom',
|
||||
'db': 'http://www.douban.com/xmlns/',
|
||||
'gd': 'http://schemas.google.com/g/2005'
|
||||
}
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
total_results = XPath('//openSearch:totalResults')
|
||||
start_index = XPath('//openSearch:startIndex')
|
||||
items_per_page = XPath('//openSearch:itemsPerPage')
|
||||
entry = XPath('//atom:entry')
|
||||
entry_id = XPath('descendant::atom:id')
|
||||
title = XPath('descendant::atom:title')
|
||||
description = XPath('descendant::atom:summary')
|
||||
publisher = XPath("descendant::db:attribute[@name='publisher']")
|
||||
isbn = XPath("descendant::db:attribute[@name='isbn13']")
|
||||
date = XPath("descendant::db:attribute[@name='pubdate']")
|
||||
creator = XPath("descendant::db:attribute[@name='author']")
|
||||
booktag = XPath("descendant::db:tag/attribute::name")
|
||||
rating = XPath("descendant::gd:rating/attribute::average")
|
||||
cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
|
||||
|
||||
def get_details(browser, url, timeout): # {{{
|
||||
try:
|
||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||
except Exception as e:
|
||||
gc = getattr(e, 'getcode', lambda : -1)
|
||||
if gc() != 403:
|
||||
raise
|
||||
# Douban is throttling us, wait a little
|
||||
time.sleep(2)
|
||||
raw = browser.open_novisit(url, timeout=timeout).read()
|
||||
|
||||
return raw
|
||||
# }}}
|
||||
|
||||
def to_metadata(browser, log, entry_, timeout): # {{{
|
||||
def get_text(extra, x):
|
||||
try:
|
||||
ans = x(extra)
|
||||
if ans:
|
||||
ans = ans[0].text
|
||||
if ans and ans.strip():
|
||||
return ans.strip()
|
||||
except:
|
||||
log.exception('Programming error:')
|
||||
return None
|
||||
|
||||
id_url = entry_id(entry_)[0].text
|
||||
douban_id = id_url.split('/')[-1]
|
||||
title_ = ': '.join([x.text for x in title(entry_)]).strip()
|
||||
authors = [x.text.strip() for x in creator(entry_) if x.text]
|
||||
if not authors:
|
||||
authors = [_('Unknown')]
|
||||
if not id_url or not title:
|
||||
# Silently discard this entry
|
||||
return None
|
||||
|
||||
mi = Metadata(title_, authors)
|
||||
mi.identifiers = {'douban':douban_id}
|
||||
try:
|
||||
raw = get_details(browser, id_url, timeout)
|
||||
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
||||
strip_encoding_pats=True)[0])
|
||||
extra = entry(feed)[0]
|
||||
except:
|
||||
log.exception('Failed to get additional details for', mi.title)
|
||||
return mi
|
||||
mi.comments = get_text(extra, description)
|
||||
mi.publisher = get_text(extra, publisher)
|
||||
|
||||
# ISBN
|
||||
isbns = []
|
||||
for x in [t.text for t in isbn(extra)]:
|
||||
if check_isbn(x):
|
||||
isbns.append(x)
|
||||
if isbns:
|
||||
mi.isbn = sorted(isbns, key=len)[-1]
|
||||
mi.all_isbns = isbns
|
||||
|
||||
# Tags
|
||||
try:
|
||||
btags = [x for x in booktag(extra) if x]
|
||||
tags = []
|
||||
for t in btags:
|
||||
atags = [y.strip() for y in t.split('/')]
|
||||
for tag in atags:
|
||||
if tag not in tags:
|
||||
tags.append(tag)
|
||||
except:
|
||||
log.exception('Failed to parse tags:')
|
||||
tags = []
|
||||
if tags:
|
||||
mi.tags = [x.replace(',', ';') for x in tags]
|
||||
|
||||
# pubdate
|
||||
pubdate = get_text(extra, date)
|
||||
if pubdate:
|
||||
try:
|
||||
default = utcnow().replace(day=15)
|
||||
mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
|
||||
except:
|
||||
log.error('Failed to parse pubdate %r'%pubdate)
|
||||
|
||||
# Ratings
|
||||
if rating(extra):
|
||||
try:
|
||||
mi.rating = float(rating(extra)[0]) / 2.0
|
||||
except:
|
||||
log.exception('Failed to parse rating')
|
||||
mi.rating = 0
|
||||
|
||||
# Cover
|
||||
mi.has_douban_cover = None
|
||||
u = cover_url(extra)
|
||||
if u:
|
||||
u = u[0].replace('/spic/', '/lpic/');
|
||||
# If URL contains "book-default", the book doesn't have a cover
|
||||
if u.find('book-default') == -1:
|
||||
mi.has_douban_cover = u
|
||||
return mi
|
||||
# }}}
|
||||
|
||||
class Douban(Source):
|
||||
|
||||
name = 'Douban Books'
|
||||
author = 'Li Fanxi'
|
||||
version = (2, 0, 0)
|
||||
|
||||
description = _('Downloads metadata and covers from Douban.com')
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
touched_fields = frozenset(['title', 'authors', 'tags',
|
||||
'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating',
|
||||
'identifier:douban']) # language currently disabled
|
||||
supports_gzip_transfer_encoding = True
|
||||
cached_cover_url_is_reliable = True
|
||||
|
||||
DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
|
||||
DOUBAN_BOOK_URL = 'http://book.douban.com/subject/%s/'
|
||||
|
||||
def get_book_url(self, identifiers): # {{{
|
||||
db = identifiers.get('douban', None)
|
||||
if db is not None:
|
||||
return ('douban', db, self.DOUBAN_BOOK_URL%db)
|
||||
# }}}
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||
SEARCH_URL = 'http://api.douban.com/book/subjects?'
|
||||
ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
|
||||
SUBJECT_URL = 'http://api.douban.com/book/subject/'
|
||||
|
||||
q = ''
|
||||
t = None
|
||||
isbn = check_isbn(identifiers.get('isbn', None))
|
||||
subject = identifiers.get('douban', None)
|
||||
if isbn is not None:
|
||||
q = isbn
|
||||
t = 'isbn'
|
||||
elif subject is not None:
|
||||
q = subject
|
||||
t = 'subject'
|
||||
elif title or authors:
|
||||
def build_term(prefix, parts):
|
||||
return ' '.join(x for x in parts)
|
||||
title_tokens = list(self.get_title_tokens(title))
|
||||
if title_tokens:
|
||||
q += build_term('title', title_tokens)
|
||||
author_tokens = self.get_author_tokens(authors,
|
||||
only_first_author=True)
|
||||
if author_tokens:
|
||||
q += ((' ' if q != '' else '') +
|
||||
build_term('author', author_tokens))
|
||||
t = 'search'
|
||||
q = q.strip()
|
||||
if isinstance(q, unicode):
|
||||
q = q.encode('utf-8')
|
||||
if not q:
|
||||
return None
|
||||
url = None
|
||||
if t == "isbn":
|
||||
url = ISBN_URL + q
|
||||
elif t == 'subject':
|
||||
url = SUBJECT_URL + q
|
||||
else:
|
||||
url = SEARCH_URL + urlencode({
|
||||
'q': q,
|
||||
})
|
||||
if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
|
||||
url = url + "?apikey=" + self.DOUBAN_API_KEY
|
||||
return url
|
||||
# }}}
|
||||
|
||||
def download_cover(self, log, result_queue, abort, # {{{
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
cached_url = self.get_cached_cover_url(identifiers)
|
||||
if cached_url is None:
|
||||
log.info('No cached cover found, running identify')
|
||||
rq = Queue()
|
||||
self.identify(log, rq, abort, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if abort.is_set():
|
||||
return
|
||||
results = []
|
||||
while True:
|
||||
try:
|
||||
results.append(rq.get_nowait())
|
||||
except Empty:
|
||||
break
|
||||
results.sort(key=self.identify_results_keygen(
|
||||
title=title, authors=authors, identifiers=identifiers))
|
||||
for mi in results:
|
||||
cached_url = self.get_cached_cover_url(mi.identifiers)
|
||||
if cached_url is not None:
|
||||
break
|
||||
if cached_url is None:
|
||||
log.info('No cover found')
|
||||
return
|
||||
|
||||
if abort.is_set():
|
||||
return
|
||||
br = self.browser
|
||||
log('Downloading cover from:', cached_url)
|
||||
try:
|
||||
cdata = br.open_novisit(cached_url, timeout=timeout).read()
|
||||
if cdata:
|
||||
result_queue.put((self, cdata))
|
||||
except:
|
||||
log.exception('Failed to download cover from:', cached_url)
|
||||
|
||||
# }}}
|
||||
|
||||
def get_cached_cover_url(self, identifiers): # {{{
|
||||
url = None
|
||||
db = identifiers.get('douban', None)
|
||||
if db is None:
|
||||
isbn = identifiers.get('isbn', None)
|
||||
if isbn is not None:
|
||||
db = self.cached_isbn_to_identifier(isbn)
|
||||
if db is not None:
|
||||
url = self.cached_identifier_to_cover_url(db)
|
||||
|
||||
return url
|
||||
# }}}
|
||||
|
||||
def get_all_details(self, br, log, entries, abort, # {{{
|
||||
result_queue, timeout):
|
||||
for relevance, i in enumerate(entries):
|
||||
try:
|
||||
ans = to_metadata(br, log, i, timeout)
|
||||
if isinstance(ans, Metadata):
|
||||
ans.source_relevance = relevance
|
||||
db = ans.identifiers['douban']
|
||||
for isbn in getattr(ans, 'all_isbns', []):
|
||||
self.cache_isbn_to_identifier(isbn, db)
|
||||
if ans.has_douban_cover:
|
||||
self.cache_identifier_to_cover_url(db,
|
||||
ans.has_douban_cover)
|
||||
self.clean_downloaded_metadata(ans)
|
||||
result_queue.put(ans)
|
||||
except:
|
||||
log.exception(
|
||||
'Failed to get metadata for identify entry:',
|
||||
etree.tostring(i))
|
||||
if abort.is_set():
|
||||
break
|
||||
# }}}
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
query = self.create_query(log, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if not query:
|
||||
log.error('Insufficient metadata to construct query')
|
||||
return
|
||||
br = self.browser
|
||||
try:
|
||||
raw = br.open_novisit(query, timeout=timeout).read()
|
||||
except Exception as e:
|
||||
log.exception('Failed to make identify query: %r'%query)
|
||||
return as_unicode(e)
|
||||
try:
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
||||
strip_encoding_pats=True)[0], parser=parser)
|
||||
entries = entry(feed)
|
||||
except Exception as e:
|
||||
log.exception('Failed to parse identify results')
|
||||
return as_unicode(e)
|
||||
if not entries and identifiers and title and authors and \
|
||||
not abort.is_set():
|
||||
return self.identify(log, result_queue, abort, title=title,
|
||||
authors=authors, timeout=timeout)
|
||||
|
||||
# There is no point running these queries in threads as douban
|
||||
# throttles requests returning 403 Forbidden errors
|
||||
self.get_all_details(br, log, entries, abort, result_queue, timeout)
|
||||
|
||||
return None
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__': # tests {{{
|
||||
# To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||
title_test, authors_test)
|
||||
test_identify_plugin(Douban.name,
|
||||
[
|
||||
|
||||
|
||||
(
|
||||
{'identifiers':{'isbn': '9787536692930'}, 'title':'三体',
|
||||
'authors':['刘慈欣']},
|
||||
[title_test('三体', exact=True),
|
||||
authors_test(['刘慈欣'])]
|
||||
),
|
||||
|
||||
(
|
||||
{'title': 'Linux内核修炼之道', 'authors':['任桥伟']},
|
||||
[title_test('Linux内核修炼之道', exact=False)]
|
||||
),
|
||||
])
|
||||
# }}}
|
||||
|
@ -157,7 +157,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
|
||||
class GoogleBooks(Source):
|
||||
|
||||
name = 'Google'
|
||||
description = _('Downloads metadata from Google Books')
|
||||
description = _('Downloads metadata and covers from Google Books')
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
|
||||
|
@ -13,6 +13,7 @@ from Queue import Queue, Empty
|
||||
from threading import Thread
|
||||
from io import BytesIO
|
||||
from operator import attrgetter
|
||||
from urlparse import urlparse
|
||||
|
||||
from calibre.customize.ui import metadata_plugins, all_metadata_plugins
|
||||
from calibre.ebooks.metadata.sources.base import create_log, msprefs
|
||||
@ -371,6 +372,18 @@ def identify(log, abort, # {{{
|
||||
longest, lp = -1, ''
|
||||
for plugin, presults in results.iteritems():
|
||||
presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
|
||||
|
||||
# Throw away lower priority results from the same source that have exactly the same
|
||||
# title and authors as a higher priority result
|
||||
filter_results = set()
|
||||
filtered_results = []
|
||||
for r in presults:
|
||||
key = (r.title, tuple(r.authors))
|
||||
if key not in filter_results:
|
||||
filtered_results.append(r)
|
||||
filter_results.add(key)
|
||||
results[plugin] = presults = filtered_results
|
||||
|
||||
plog = logs[plugin].getvalue().strip()
|
||||
log('\n'+'*'*30, plugin.name, '*'*30)
|
||||
log('Request extra headers:', plugin.browser.addheaders)
|
||||
@ -402,7 +415,7 @@ def identify(log, abort, # {{{
|
||||
result.identify_plugin = plugin
|
||||
if msprefs['txt_comments']:
|
||||
if plugin.has_html_comments and result.comments:
|
||||
result.comments = html2text(r.comments)
|
||||
result.comments = html2text(result.comments)
|
||||
|
||||
log('The identify phase took %.2f seconds'%(time.time() - start_time))
|
||||
log('The longest time (%f) was taken by:'%longest, lp)
|
||||
@ -458,6 +471,14 @@ def urls_from_identifiers(identifiers): # {{{
|
||||
if oclc:
|
||||
ans.append(('OCLC', 'oclc', oclc,
|
||||
'http://www.worldcat.org/oclc/'+oclc))
|
||||
url = identifiers.get('uri', None)
|
||||
if url is None:
|
||||
url = identifiers.get('url', None)
|
||||
if url and url.startswith('http'):
|
||||
url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
|
||||
parts = urlparse(url)
|
||||
name = parts.netloc
|
||||
ans.append((name, 'url', url, url))
|
||||
return ans
|
||||
# }}}
|
||||
|
||||
@ -470,7 +491,7 @@ if __name__ == '__main__': # tests {{{
|
||||
(
|
||||
{'title':'Magykal Papers',
|
||||
'authors':['Sage']},
|
||||
[title_test('The Magykal Papers', exact=True)],
|
||||
[title_test('Septimus Heap: The Magykal Papers', exact=True)],
|
||||
),
|
||||
|
||||
|
||||
@ -497,12 +518,6 @@ if __name__ == '__main__': # tests {{{
|
||||
exact=True), authors_test(['Dan Brown'])]
|
||||
),
|
||||
|
||||
( # No ISBN
|
||||
{'title':'Justine', 'authors':['Durrel']},
|
||||
[title_test('Justine', exact=True),
|
||||
authors_test(['Lawrence Durrel'])]
|
||||
),
|
||||
|
||||
( # A newer book
|
||||
{'identifiers':{'isbn': '9780316044981'}},
|
||||
[title_test('The Heroes', exact=True),
|
||||
|
@ -30,7 +30,7 @@ base_url = 'http://search.overdrive.com/'
|
||||
class OverDrive(Source):
|
||||
|
||||
name = 'Overdrive'
|
||||
description = _('Downloads metadata from Overdrive\'s Content Reserve')
|
||||
description = _('Downloads metadata and covers from Overdrive\'s Content Reserve')
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
|
||||
@ -41,7 +41,7 @@ class OverDrive(Source):
|
||||
cached_cover_url_is_reliable = True
|
||||
|
||||
options = (
|
||||
Option('get_full_metadata', 'bool', False,
|
||||
Option('get_full_metadata', 'bool', True,
|
||||
_('Download all metadata (slow)'),
|
||||
_('Enable this option to gather all metadata available from Overdrive.')),
|
||||
)
|
||||
|
@ -191,7 +191,11 @@ class OEBReader(object):
|
||||
if not scheme and href not in known:
|
||||
new.add(href)
|
||||
elif item.media_type in OEB_STYLES:
|
||||
for url in cssutils.getUrls(item.data):
|
||||
try:
|
||||
urls = list(cssutils.getUrls(item.data))
|
||||
except:
|
||||
urls = []
|
||||
for url in urls:
|
||||
href, _ = urldefrag(url)
|
||||
href = item.abshref(urlnormalize(href))
|
||||
scheme = urlparse(href).scheme
|
||||
|
@ -32,10 +32,11 @@ class PDFInput(InputFormatPlugin):
|
||||
|
||||
def convert_new(self, stream, accelerators):
|
||||
from calibre.ebooks.pdf.reflow import PDFDocument
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
if pdfreflow_err:
|
||||
raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
|
||||
pdfreflow.reflow(stream.read(), 1, -1)
|
||||
xml = open('index.xml', 'rb').read()
|
||||
xml = clean_ascii_chars(open('index.xml', 'rb').read())
|
||||
PDFDocument(xml, self.opts, self.log)
|
||||
return os.path.join(os.getcwd(), 'metadata.opf')
|
||||
|
||||
|
@ -86,7 +86,7 @@ class RTFInput(InputFormatPlugin):
|
||||
run_lev = 4
|
||||
self.log('Running RTFParser in debug mode')
|
||||
except:
|
||||
pass
|
||||
self.log.warn('Impossible to run RTFParser in debug mode')
|
||||
parser = ParseRtf(
|
||||
in_file = stream,
|
||||
out_file = ofile,
|
||||
|
@ -197,8 +197,8 @@ class ProcessTokens:
|
||||
# character info => ci
|
||||
'b' : ('ci', 'bold______', self.bool_st_func),
|
||||
'blue' : ('ci', 'blue______', self.color_func),
|
||||
'caps' : ('ci', 'caps______', self.bool_st_func),
|
||||
'cf' : ('ci', 'font-color', self.default_func),
|
||||
'caps' : ('ci', 'caps______', self.bool_st_func),
|
||||
'cf' : ('ci', 'font-color', self.colorz_func),
|
||||
'chftn' : ('ci', 'footnot-mk', self.bool_st_func),
|
||||
'dn' : ('ci', 'font-down_', self.divide_by_2),
|
||||
'embo' : ('ci', 'emboss____', self.bool_st_func),
|
||||
@ -624,6 +624,11 @@ class ProcessTokens:
|
||||
num = 'true'
|
||||
return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
|
||||
|
||||
def colorz_func(self, pre, token, num):
|
||||
if num is None:
|
||||
num = '0'
|
||||
return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
|
||||
|
||||
def __list_type_func(self, pre, token, num):
|
||||
type = 'arabic'
|
||||
if num is None:
|
||||
|
@ -12,7 +12,7 @@ A Humane Web Text Generator
|
||||
#__date__ = '2009/12/04'
|
||||
|
||||
__copyright__ = """
|
||||
Copyright (c) 2011, Leigh Parry
|
||||
Copyright (c) 2011, Leigh Parry <leighparry@blueyonder.co.uk>
|
||||
Copyright (c) 2011, John Schember <john@nachtimwald.com>
|
||||
Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
|
||||
Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
|
||||
@ -219,14 +219,13 @@ class Textile(object):
|
||||
]
|
||||
glyph_defaults = [
|
||||
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign
|
||||
(re.compile(r'(\d+)\'', re.I), r'\1′'), # prime
|
||||
(re.compile(r'(\d+)\"', re.I), r'\1″'), # prime-double
|
||||
(re.compile(r'(\d+)\'(\s)', re.I), r'\1′\2'), # prime
|
||||
(re.compile(r'(\d+)\"(\s)', re.I), r'\1″\2'), # prime-double
|
||||
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
|
||||
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
|
||||
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1…'), # ellipsis
|
||||
(re.compile(r'^[\*_-]{3,}$', re.M), r'<hr />'), # <hr> scene-break
|
||||
(re.compile(r'\b--\b'), r'—'), # em dash
|
||||
(re.compile(r'(\s)--(\s)'), r'\1—\2'), # em dash
|
||||
(re.compile(r'(^|[^-])--([^-]|$)'), r'\1—\2'), # em dash
|
||||
(re.compile(r'\s-(?:\s|$)'), r' – '), # en dash
|
||||
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1™'), # trademark
|
||||
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1®'), # registered
|
||||
@ -706,6 +705,21 @@ class Textile(object):
|
||||
result.append(line)
|
||||
return ''.join(result)
|
||||
|
||||
def macros_only(self, text):
|
||||
# fix: hackish
|
||||
text = re.sub(r'"\Z', '\" ', text)
|
||||
|
||||
result = []
|
||||
for line in re.compile(r'(<.*?>)', re.U).split(text):
|
||||
if not re.search(r'<.*>', line):
|
||||
rules = []
|
||||
if re.search(r'{.+?}', line):
|
||||
rules = self.macro_defaults
|
||||
for s, r in rules:
|
||||
line = s.sub(r, line)
|
||||
result.append(line)
|
||||
return ''.join(result)
|
||||
|
||||
def vAlign(self, input):
|
||||
d = {'^':'top', '-':'middle', '~':'bottom'}
|
||||
return d.get(input, '')
|
||||
@ -814,6 +828,7 @@ class Textile(object):
|
||||
'fooobar ... and hello world ...'
|
||||
"""
|
||||
|
||||
text = self.macros_only(text)
|
||||
punct = '!"#$%&\'*+,-./:;=?@\\^_`|~'
|
||||
|
||||
pattern = r'''
|
||||
@ -1044,4 +1059,3 @@ def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
|
||||
return Textile(restricted=True, lite=lite,
|
||||
noimage=noimage).textile(text, rel='nofollow',
|
||||
html_type=html_type)
|
||||
|
||||
|
@ -66,19 +66,26 @@ class TXTOutput(OutputFormatPlugin):
|
||||
help=_('Do not remove image references within the document. This is only ' \
|
||||
'useful when paired with a txt-output-formatting option that '
|
||||
'is not none because links are always removed with plain text output.')),
|
||||
OptionRecommendation(name='keep_color',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Do not remove font color from output. This is only useful when ' \
|
||||
'txt-output-formatting is set to textile. Textile is the only ' \
|
||||
'formatting that supports setting font color. If this option is ' \
|
||||
'not specified font color will not be set and default to the ' \
|
||||
'color displayed by the reader (generally this is black).')),
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
if opts.txt_output_formatting.lower() == 'markdown':
|
||||
from calibre.ebooks.txt.markdownml import MarkdownMLizer
|
||||
writer = MarkdownMLizer(log)
|
||||
self.writer = MarkdownMLizer(log)
|
||||
elif opts.txt_output_formatting.lower() == 'textile':
|
||||
from calibre.ebooks.txt.textileml import TextileMLizer
|
||||
writer = TextileMLizer(log)
|
||||
self.writer = TextileMLizer(log)
|
||||
else:
|
||||
writer = TXTMLizer(log)
|
||||
self.writer = TXTMLizer(log)
|
||||
|
||||
txt = writer.extract_content(oeb_book, opts)
|
||||
txt = self.writer.extract_content(oeb_book, opts)
|
||||
txt = clean_ascii_chars(txt)
|
||||
|
||||
log.debug('\tReplacing newlines with selected type...')
|
||||
@ -111,17 +118,28 @@ class TXTZOutput(TXTOutput):
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||
with TemporaryDirectory('_txtz_output') as tdir:
|
||||
# TXT
|
||||
with TemporaryFile('index.txt') as tf:
|
||||
txt_name = 'index.txt'
|
||||
if opts.txt_output_formatting.lower() == 'textile':
|
||||
txt_name = 'index.text'
|
||||
with TemporaryFile(txt_name) as tf:
|
||||
TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log)
|
||||
shutil.copy(tf, os.path.join(tdir, 'index.txt'))
|
||||
shutil.copy(tf, os.path.join(tdir, txt_name))
|
||||
|
||||
# Images
|
||||
for item in oeb_book.manifest:
|
||||
if item.media_type in OEB_IMAGES:
|
||||
path = os.path.join(tdir, os.path.dirname(item.href))
|
||||
if hasattr(self.writer, 'images'):
|
||||
path = os.path.join(tdir, 'images')
|
||||
if item.href in self.writer.images:
|
||||
href = self.writer.images[item.href]
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
path = os.path.join(tdir, os.path.dirname(item.href))
|
||||
href = os.path.basename(item.href)
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
with open(os.path.join(tdir, item.href), 'wb') as imgf:
|
||||
with open(os.path.join(path, href), 'wb') as imgf:
|
||||
imgf.write(item.data)
|
||||
|
||||
# Metadata
|
||||
|
@ -242,6 +242,8 @@ def detect_formatting_type(txt):
|
||||
textile_count += len(re.findall(r'(?mu)(?<=\!)\S+(?=\!)', txt))
|
||||
# Links
|
||||
textile_count += len(re.findall(r'"[^"]*":\S+', txt))
|
||||
# paragraph blocks
|
||||
textile_count += len(re.findall(r'(?mu)^p(<|<>|=|>)?\. ', txt))
|
||||
|
||||
# Decide if either markdown or textile is used in the text
|
||||
# based on the number of unique formatting elements found.
|
||||
|
@ -1,62 +1,489 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Transform OEB content into Textile formatted plain text
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from lxml import etree
|
||||
from functools import partial
|
||||
|
||||
from calibre.ebooks.oeb.base import XHTML
|
||||
from calibre.utils.html2textile import html2textile
|
||||
from calibre.ebooks.htmlz.oeb2html import OEB2HTML
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, rewrite_links
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks import unit_convert
|
||||
from calibre.ebooks.txt.unsmarten import unsmarten
|
||||
|
||||
class TextileMLizer(object):
|
||||
|
||||
def __init__(self, log):
|
||||
self.log = log
|
||||
class TextileMLizer(OEB2HTML):
|
||||
|
||||
def extract_content(self, oeb_book, opts):
|
||||
self.log.info('Converting XHTML to Textile formatted TXT...')
|
||||
self.oeb_book = oeb_book
|
||||
self.opts = opts
|
||||
self.in_pre = False
|
||||
self.in_table = False
|
||||
self.links = {}
|
||||
self.list = []
|
||||
self.our_links = []
|
||||
self.in_a_link = False
|
||||
self.our_ids = []
|
||||
self.images = {}
|
||||
self.id_no_text = u''
|
||||
self.style_embed = []
|
||||
self.remove_space_after_newline = False
|
||||
self.base_hrefs = [item.href for item in oeb_book.spine]
|
||||
self.map_resources(oeb_book)
|
||||
|
||||
return self.mlize_spine()
|
||||
self.style_bold = False
|
||||
self.style_italic = False
|
||||
self.style_under = False
|
||||
self.style_strike = False
|
||||
self.style_smallcap = False
|
||||
|
||||
def mlize_spine(self):
|
||||
txt = self.mlize_spine(oeb_book)
|
||||
txt = unsmarten(txt)
|
||||
|
||||
# Do some tidying up
|
||||
txt = self.tidy_up(txt)
|
||||
|
||||
return txt
|
||||
|
||||
def mlize_spine(self, oeb_book):
|
||||
output = [u'']
|
||||
|
||||
for item in self.oeb_book.spine:
|
||||
for item in oeb_book.spine:
|
||||
self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
|
||||
self.rewrite_ids(item.data, item)
|
||||
rewrite_links(item.data, partial(self.rewrite_link, page=item))
|
||||
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile)
|
||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||
output.append('\n\n')
|
||||
return ''.join(output)
|
||||
|
||||
html = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
||||
def tidy_up(self, text):
|
||||
# May need tweaking and finetuning
|
||||
def check_escaping(text, tests):
|
||||
for t in tests:
|
||||
# I'm not checking for duplicated spans '%' as any that follow each other were being incorrectly merged
|
||||
txt = '%s' % t
|
||||
if txt != '%':
|
||||
text = re.sub(r'([^'+t+'|^\n])'+t+'\]\['+t+'([^'+t+'])', r'\1\2', text)
|
||||
text = re.sub(r'([^'+t+'|^\n])'+t+t+'([^'+t+'])', r'\1\2', text)
|
||||
text = re.sub(r'(\s|[*_\'"])\[('+t+'[a-zA-Z0-9 \'",.*_]+'+t+')\](\s|[*_\'"?!,.])', r'\1\2\3', text)
|
||||
return text
|
||||
|
||||
if not self.opts.keep_links:
|
||||
html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
|
||||
if not self.opts.keep_image_references:
|
||||
html = re.sub(r'<\s*img[^>]*>', '', html)
|
||||
# Now tidyup links and ids - remove ones that don't have a correponding opposite
|
||||
if self.opts.keep_links:
|
||||
for i in self.our_links:
|
||||
if i[0] == '#':
|
||||
if i not in self.our_ids:
|
||||
text = re.sub(r'"(.+)":'+i+'(\s)', r'\1\2', text)
|
||||
for i in self.our_ids:
|
||||
if i not in self.our_links:
|
||||
text = re.sub(r'%?\('+i+'\)\xa0?%?', r'', text)
|
||||
|
||||
# Remove obvious non-needed escaping, add sub/sup-script ones
|
||||
text = check_escaping(text, ['\*', '_', '\*'])
|
||||
# escape the super/sub-scripts if needed
|
||||
text = re.sub(r'(\w)([~^]\w+[~^])', r'\1[\2]', text)
|
||||
# escape the super/sub-scripts if needed
|
||||
text = re.sub(r'([~^]\w+[~^])(\w)', r'[\1]\2', text)
|
||||
|
||||
text = html2textile(html)
|
||||
#remove empty spans
|
||||
text = re.sub(r'%\xa0+', r'%', text)
|
||||
#remove empty spans - MAY MERGE SOME ?
|
||||
text = re.sub(r'%%', r'', text)
|
||||
#remove spans from tagged output
|
||||
text = re.sub(r'%([_+*-]+)%', r'\1', text)
|
||||
#remove spaces before a newline
|
||||
text = re.sub(r' +\n', r'\n', text)
|
||||
#remove newlines at top of file
|
||||
text = re.sub(r'^\n+', r'', text)
|
||||
#correct blockcode paras
|
||||
text = re.sub(r'\npre\.\n?\nbc\.', r'\nbc.', text)
|
||||
#correct blockquote paras
|
||||
text = re.sub(r'\nbq\.\n?\np.*\. ', r'\nbq. ', text)
|
||||
|
||||
# Ensure the section ends with at least two new line characters.
|
||||
# This is to prevent the last paragraph from a section being
|
||||
# combined into the fist paragraph of the next.
|
||||
end_chars = text[-4:]
|
||||
# Convert all newlines to \n
|
||||
end_chars = end_chars.replace('\r\n', '\n')
|
||||
end_chars = end_chars.replace('\r', '\n')
|
||||
end_chars = end_chars[-2:]
|
||||
if not end_chars[1] == '\n':
|
||||
text += '\n\n'
|
||||
if end_chars[1] == '\n' and not end_chars[0] == '\n':
|
||||
text += '\n'
|
||||
#reduce blank lines
|
||||
text = re.sub(r'\n{3}', r'\n\np. \n\n', text)
|
||||
text = re.sub(u'%\n(p[<>=]{1,2}\.|p\.)', r'%\n\n\1', text)
|
||||
#Check span following blank para
|
||||
text = re.sub(r'\n+ +%', r' %', text)
|
||||
text = re.sub(u'p[<>=]{1,2}\.\n\n?', r'', text)
|
||||
# blank paragraph
|
||||
text = re.sub(r'\n(p.*\.)\n', r'\n\1 \n\n', text)
|
||||
# blank paragraph
|
||||
text = re.sub(u'\n\xa0', r'\np. ', text)
|
||||
# blank paragraph
|
||||
text = re.sub(u'\np[<>=]{1,2}?\. \xa0', r'\np. ', text)
|
||||
text = re.sub(r'(^|\n)(p.*\. ?\n)(p.*\.)', r'\1\3', text)
|
||||
text = re.sub(r'\n(p\. \n)(p.*\.|h.*\.)', r'\n\2', text)
|
||||
#sort out spaces in tables
|
||||
text = re.sub(r' {2,}\|', r' |', text)
|
||||
|
||||
output += text
|
||||
# Now put back spaces removed earlier as they're needed here
|
||||
text = re.sub(r'\np\.\n', r'\np. \n', text)
|
||||
#reduce blank lines
|
||||
text = re.sub(r' \n\n\n', r' \n\n', text)
|
||||
|
||||
output = u''.join(output)
|
||||
return text
|
||||
|
||||
return output
|
||||
def remove_newlines(self, text):
|
||||
text = text.replace('\r\n', ' ')
|
||||
text = text.replace('\n', ' ')
|
||||
text = text.replace('\r', ' ')
|
||||
# Condense redundant spaces created by replacing newlines with spaces.
|
||||
text = re.sub(r'[ ]{2,}', ' ', text)
|
||||
text = re.sub(r'\t+', '', text)
|
||||
if self.remove_space_after_newline == True:
|
||||
text = re.sub(r'^ +', '', text)
|
||||
self.remove_space_after_newline = False
|
||||
return text
|
||||
|
||||
def check_styles(self, style):
|
||||
txt = '{'
|
||||
if self.opts.keep_color:
|
||||
if 'color' in style.cssdict() and style['color'] != 'black':
|
||||
txt += 'color:'+style['color']+';'
|
||||
if 'background' in style.cssdict():
|
||||
txt += 'background:'+style['background']+';'
|
||||
txt += '}'
|
||||
if txt == '{}': txt = ''
|
||||
return txt
|
||||
|
||||
def check_halign(self, style):
|
||||
tests = {'left':'<','justify':'<>','center':'=','right':'>'}
|
||||
for i in tests:
|
||||
if style['text-align'] == i:
|
||||
return tests[i]
|
||||
return ''
|
||||
|
||||
def check_valign(self, style):
|
||||
tests = {'top':'^','bottom':'~'} #, 'middle':'-'}
|
||||
for i in tests:
|
||||
if style['vertical-align'] == i:
|
||||
return tests[i]
|
||||
return ''
|
||||
|
||||
def check_padding(self, style, stylizer):
|
||||
txt = ''
|
||||
left_padding_pts = 0
|
||||
left_margin_pts = 0
|
||||
if 'padding-left' in style.cssdict() and style['padding-left'] != 'auto':
|
||||
left_padding_pts = unit_convert(style['padding-left'], style.width, style.fontSize, stylizer.profile.dpi)
|
||||
if 'margin-left' in style.cssdict() and style['margin-left'] != 'auto':
|
||||
left_margin_pts = unit_convert(style['margin-left'], style.width, style.fontSize, stylizer.profile.dpi)
|
||||
left = left_margin_pts + left_padding_pts
|
||||
emleft = int(round(left / stylizer.profile.fbase))
|
||||
if emleft >= 1:
|
||||
txt += '(' * emleft
|
||||
right_padding_pts = 0
|
||||
right_margin_pts = 0
|
||||
if 'padding-right' in style.cssdict() and style['padding-right'] != 'auto':
|
||||
right_padding_pts = unit_convert(style['padding-right'], style.width, style.fontSize, stylizer.profile.dpi)
|
||||
if 'margin-right' in style.cssdict() and style['margin-right'] != 'auto':
|
||||
right_margin_pts = unit_convert(style['margin-right'], style.width, style.fontSize, stylizer.profile.dpi)
|
||||
right = right_margin_pts + right_padding_pts
|
||||
emright = int(round(right / stylizer.profile.fbase))
|
||||
if emright >= 1:
|
||||
txt += ')' * emright
|
||||
|
||||
return txt
|
||||
|
||||
def check_id_tag(self, attribs):
|
||||
txt = ''
|
||||
if attribs.has_key('id'):
|
||||
txt = '(#'+attribs['id']+ ')'
|
||||
self.our_ids.append('#'+attribs['id'])
|
||||
self.id_no_text = u'\xa0'
|
||||
return txt
|
||||
|
||||
def build_block(self, tag, style, attribs, stylizer):
|
||||
txt = '\n' + tag
|
||||
if self.opts.keep_links:
|
||||
txt += self.check_id_tag(attribs)
|
||||
txt += self.check_padding(style, stylizer)
|
||||
txt += self.check_halign(style)
|
||||
txt += self.check_styles(style)
|
||||
return txt
|
||||
|
||||
def prepare_string_for_textile(self, txt):
|
||||
if re.search(r'(\s([*&_+\-~@%|]|\?{2})\S)|(\S([*&_+\-~@%|]|\?{2})\s)', txt):
|
||||
return ' ==%s== ' % txt
|
||||
return txt
|
||||
|
||||
def dump_text(self, elem, stylizer):
|
||||
'''
|
||||
@elem: The element in the etree that we are working on.
|
||||
@stylizer: The style information attached to the element.
|
||||
'''
|
||||
|
||||
# We can only processes tags. If there isn't a tag return any text.
|
||||
if not isinstance(elem.tag, basestring) \
|
||||
or namespace(elem.tag) != XHTML_NS:
|
||||
p = elem.getparent()
|
||||
if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
|
||||
and elem.tail:
|
||||
return [elem.tail]
|
||||
return ['']
|
||||
|
||||
# Setup our variables.
|
||||
text = ['']
|
||||
style = stylizer.style(elem)
|
||||
tags = []
|
||||
tag = barename(elem.tag)
|
||||
attribs = elem.attrib
|
||||
|
||||
# Ignore anything that is set to not be displayed.
|
||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||
or style['visibility'] == 'hidden':
|
||||
return ['']
|
||||
|
||||
# Soft scene breaks.
|
||||
if 'margin-top' in style.cssdict() and style['margin-top'] != 'auto':
|
||||
ems = int(round(float(style.marginTop) / style.fontSize) - 1)
|
||||
if ems >= 1:
|
||||
text.append(u'\n\n\xa0' * ems)
|
||||
|
||||
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div'):
|
||||
if tag == 'div':
|
||||
tag = 'p'
|
||||
text.append(self.build_block(tag, style, attribs, stylizer))
|
||||
text.append('. ')
|
||||
tags.append('\n')
|
||||
|
||||
if style['font-style'] == 'italic' or tag in ('i', 'em'):
|
||||
if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite'):
|
||||
if self.style_italic == False:
|
||||
if self.in_a_link:
|
||||
text.append('_')
|
||||
tags.append('_')
|
||||
else:
|
||||
text.append('[_')
|
||||
tags.append('_]')
|
||||
self.style_embed.append('_')
|
||||
self.style_italic = True
|
||||
if style['font-weight'] in ('bold', 'bolder') or tag in ('b', 'strong'):
|
||||
if tag not in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'th'):
|
||||
if self.style_bold == False:
|
||||
if self.in_a_link:
|
||||
text.append('*')
|
||||
tags.append('*')
|
||||
else:
|
||||
text.append('[*')
|
||||
tags.append('*]')
|
||||
self.style_embed.append('*')
|
||||
self.style_bold = True
|
||||
if style['text-decoration'] == 'underline' or tag in ('u', 'ins'):
|
||||
if tag != 'a':
|
||||
if self.style_under == False:
|
||||
text.append('[+')
|
||||
tags.append('+]')
|
||||
self.style_embed.append('+')
|
||||
self.style_under = True
|
||||
if style['text-decoration'] == 'line-through' or tag in ('strike', 'del', 's'):
|
||||
if self.style_strike == False:
|
||||
text.append('[-')
|
||||
tags.append('-]')
|
||||
self.style_embed.append('-')
|
||||
self.style_strike = True
|
||||
if tag == 'br':
|
||||
for i in reversed(self.style_embed):
|
||||
text.append(i)
|
||||
text.append('\n')
|
||||
for i in self.style_embed:
|
||||
text.append(i)
|
||||
tags.append('')
|
||||
self.remove_space_after_newline = True
|
||||
if tag == 'blockquote':
|
||||
text.append('\nbq. ')
|
||||
tags.append('\n')
|
||||
elif tag in ('abbr', 'acronym'):
|
||||
text.append('')
|
||||
txt = attribs['title']
|
||||
tags.append('(' + txt + ')')
|
||||
elif tag == 'sup':
|
||||
text.append('^')
|
||||
tags.append('^')
|
||||
elif tag == 'sub':
|
||||
text.append('~')
|
||||
tags.append('~')
|
||||
elif tag == 'code':
|
||||
if self.in_pre:
|
||||
text.append('\nbc. ')
|
||||
tags.append('')
|
||||
else:
|
||||
text.append('@')
|
||||
tags.append('@')
|
||||
elif tag == 'cite':
|
||||
text.append('??')
|
||||
tags.append('??')
|
||||
elif tag == 'hr':
|
||||
text.append('\n***')
|
||||
tags.append('\n')
|
||||
elif tag == 'pre':
|
||||
self.in_pre = True
|
||||
text.append('\npre. ')
|
||||
tags.append('pre\n')
|
||||
elif tag == 'a':
|
||||
if self.opts.keep_links:
|
||||
if attribs.has_key('href'):
|
||||
text.append('"')
|
||||
tags.append('a')
|
||||
tags.append('":' + attribs['href'])
|
||||
self.our_links.append(attribs['href'])
|
||||
if attribs.has_key('title'):
|
||||
tags.append('(' + attribs['title'] + ')')
|
||||
self.in_a_link = True
|
||||
else:
|
||||
text.append('%')
|
||||
tags.append('%')
|
||||
elif tag == 'img':
|
||||
if self.opts.keep_image_references:
|
||||
txt = '!' + self.check_halign(style)
|
||||
txt += self.check_valign(style)
|
||||
txt += attribs['src']
|
||||
text.append(txt)
|
||||
if attribs.has_key('alt'):
|
||||
txt = attribs['alt']
|
||||
if txt != '':
|
||||
text.append('(' + txt + ')')
|
||||
tags.append('!')
|
||||
elif tag in ('ol', 'ul'):
|
||||
self.list.append({'name': tag, 'num': 0})
|
||||
text.append('')
|
||||
tags.append(tag)
|
||||
elif tag == 'li':
|
||||
if self.list: li = self.list[-1]
|
||||
else: li = {'name': 'ul', 'num': 0}
|
||||
text.append('\n')
|
||||
if li['name'] == 'ul':
|
||||
text.append('*' * len(self.list) + ' ')
|
||||
elif li['name'] == 'ol':
|
||||
text.append('#' * len(self.list) + ' ')
|
||||
tags.append('')
|
||||
elif tag == 'dl':
|
||||
text.append('\n')
|
||||
tags.append('')
|
||||
elif tag == 'dt':
|
||||
text.append('')
|
||||
tags.append('\n')
|
||||
elif tag == 'dd':
|
||||
text.append(' ')
|
||||
tags.append('')
|
||||
elif tag == 'dd':
|
||||
text.append('')
|
||||
tags.append('\n')
|
||||
elif tag == 'table':
|
||||
txt = self.build_block(tag, style, attribs, stylizer)
|
||||
txt += '. \n'
|
||||
if txt != '\ntable. \n':
|
||||
text.append(txt)
|
||||
else:
|
||||
text.append('\n')
|
||||
tags.append('')
|
||||
elif tag == 'tr':
|
||||
txt = self.build_block('', style, attribs, stylizer)
|
||||
txt += '. '
|
||||
if txt != '\n. ':
|
||||
txt = re.sub ('\n', '', txt)
|
||||
text.append(txt)
|
||||
tags.append('|\n')
|
||||
elif tag == 'td':
|
||||
text.append('|')
|
||||
txt = ''
|
||||
txt += self.check_halign(style)
|
||||
txt += self.check_valign(style)
|
||||
if attribs.has_key ('colspan'):
|
||||
txt += '\\' + attribs['colspan']
|
||||
if attribs.has_key ('rowspan'):
|
||||
txt += '/' + attribs['rowspan']
|
||||
txt += self.check_styles(style)
|
||||
if txt != '':
|
||||
text.append(txt + '. ')
|
||||
tags.append('')
|
||||
elif tag == 'th':
|
||||
text.append('|_. ')
|
||||
tags.append('')
|
||||
elif tag == 'span':
|
||||
if style['font-variant'] == 'small-caps':
|
||||
if self.style_smallcap == False:
|
||||
text.append('&')
|
||||
tags.append('&')
|
||||
self.style_smallcap = True
|
||||
else:
|
||||
if self.in_a_link == False:
|
||||
txt = '%'
|
||||
if self.opts.keep_links:
|
||||
txt += self.check_id_tag(attribs)
|
||||
txt += self.check_styles(style)
|
||||
if txt != '%':
|
||||
text.append(txt)
|
||||
tags.append('%')
|
||||
|
||||
if self.opts.keep_links and attribs.has_key('id'):
|
||||
if tag not in ('body', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span', 'table'):
|
||||
text.append(self.check_id_tag(attribs))
|
||||
|
||||
# Process the styles for any that we want to keep
|
||||
if tag not in ('body', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'hr', 'a', 'img', \
|
||||
'span', 'table', 'tr', 'td'):
|
||||
if not self.in_a_link:
|
||||
text.append(self.check_styles(style))
|
||||
|
||||
# Process tags that contain text.
|
||||
if hasattr(elem, 'text') and elem.text:
|
||||
txt = elem.text
|
||||
if not self.in_pre:
|
||||
txt = self.prepare_string_for_textile(self.remove_newlines(txt))
|
||||
text.append(txt)
|
||||
self.id_no_text = u''
|
||||
|
||||
# Recurse down into tags within the tag we are in.
|
||||
for item in elem:
|
||||
text += self.dump_text(item, stylizer)
|
||||
|
||||
# Close all open tags.
|
||||
tags.reverse()
|
||||
for t in tags:
|
||||
if tag in ('pre', 'ul', 'ol', 'li', 'table'):
|
||||
if tag == 'pre':
|
||||
self.in_pre = False
|
||||
elif tag in ('ul', 'ol'):
|
||||
if self.list: self.list.pop()
|
||||
if not self.list: text.append('\n')
|
||||
else:
|
||||
if t == 'a':
|
||||
self.in_a_link = False
|
||||
t = ''
|
||||
text.append(self.id_no_text)
|
||||
self.id_no_text = u''
|
||||
if t in ('*]', '*'):
|
||||
self.style_bold = False
|
||||
elif t in ('_]', '_'):
|
||||
self.style_italic = False
|
||||
elif t == '+]':
|
||||
self.style_under = False
|
||||
elif t == '-]':
|
||||
self.style_strike = False
|
||||
elif t == '&':
|
||||
self.style_smallcap = False
|
||||
if t in ('*]', '_]', '+]', '-]', '*', '_'):
|
||||
txt = self.style_embed.pop()
|
||||
text.append('%s' % t)
|
||||
|
||||
# Soft scene breaks.
|
||||
if 'margin-bottom' in style.cssdict() and style['margin-bottom'] != 'auto':
|
||||
ems = int(round((float(style.marginBottom) / style.fontSize) - 1))
|
||||
if ems >= 1:
|
||||
text.append(u'\n\n\xa0' * ems)
|
||||
|
||||
# Add the text that is outside of the tag.
|
||||
if hasattr(elem, 'tail') and elem.tail:
|
||||
tail = elem.tail
|
||||
if not self.in_pre:
|
||||
tail = self.prepare_string_for_textile(self.remove_newlines(tail))
|
||||
text.append(tail)
|
||||
|
||||
return text
|
||||
|
108
src/calibre/ebooks/txt/unsmarten.py
Normal file
@ -0,0 +1,108 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""unsmarten : html2textile helper function"""
|
||||
|
||||
__version__ = '0.1'
|
||||
__author__ = 'Leigh Parry'
|
||||
|
||||
import re
|
||||
|
||||
def unsmarten(txt):
|
||||
txt = re.sub(u'–|–|–', r'-', txt) # en-dash
|
||||
txt = re.sub(u'—|—|—', r'--', txt) # em-dash
|
||||
txt = re.sub(u'…|…|…', r'...', txt) # ellipsis
|
||||
|
||||
txt = re.sub(u'“|”|″|“|”|″|“|”|″', r'"', txt) # double quote
|
||||
txt = re.sub(u'(["\'‘“]|\s)’', r"\1{'/}", txt) # apostrophe
|
||||
txt = re.sub(u'‘|’|′|‘|’|′|‘|’|′', r"'", txt) # single quote
|
||||
|
||||
txt = re.sub(u'¢|¢|¢', r'{c\}', txt) # cent
|
||||
txt = re.sub(u'£|£|£', r'{L-}', txt) # pound
|
||||
txt = re.sub(u'¥|¥|¥', r'{Y=}', txt) # yen
|
||||
txt = re.sub(u'©|©|©', r'{(c)}', txt) # copyright
|
||||
txt = re.sub(u'®|®|®', r'{(r)}', txt) # registered
|
||||
txt = re.sub(u'¼|¼|¼', r'{1/4}', txt) # quarter
|
||||
txt = re.sub(u'½|½|½', r'{1/2}', txt) # half
|
||||
txt = re.sub(u'¾|¾|¾', r'{3/4}', txt) # three-quarter
|
||||
txt = re.sub(u'À|À|À', r'{A`)}', txt) # A-grave
|
||||
txt = re.sub(u'Á|Á|Á', r"{A'}", txt) # A-acute
|
||||
txt = re.sub(u'Â|Â|Â', r'{A^}', txt) # A-circumflex
|
||||
txt = re.sub(u'Ã|Ã|Ã', r'{A~}', txt) # A-tilde
|
||||
txt = re.sub(u'Ä|Ä|Ä', r'{A"}', txt) # A-umlaut
|
||||
txt = re.sub(u'Å|Å|Å', r'{Ao}', txt) # A-ring
|
||||
txt = re.sub(u'Æ|Æ|Æ', r'{AE}', txt) # AE
|
||||
txt = re.sub(u'Ç|Ç|Ç', r'{C,}', txt) # C-cedilla
|
||||
txt = re.sub(u'È|È|È', r'{E`}', txt) # E-grave
|
||||
txt = re.sub(u'É|É|É', r"{E'}", txt) # E-acute
|
||||
txt = re.sub(u'Ê|Ê|Ê', r'{E^}', txt) # E-circumflex
|
||||
txt = re.sub(u'Ë|Ë|Ë', r'{E"}', txt) # E-umlaut
|
||||
txt = re.sub(u'Ì|Ì|Ì', r'{I`}', txt) # I-grave
|
||||
txt = re.sub(u'Í|Í|Í', r"{I'}", txt) # I-acute
|
||||
txt = re.sub(u'Î|Î|Î', r'{I^}', txt) # I-circumflex
|
||||
txt = re.sub(u'Ï|Ï|Ï', r'{I"}', txt) # I-umlaut
|
||||
txt = re.sub(u'Ð|Ð|Ð', r'{D-}', txt) # ETH
|
||||
txt = re.sub(u'Ñ|Ñ|Ñ', r'{N~}', txt) # N-tilde
|
||||
txt = re.sub(u'Ò|Ò|Ò', r'{O`}', txt) # O-grave
|
||||
txt = re.sub(u'Ó|Ó|Ó', r"{O'}", txt) # O-acute
|
||||
txt = re.sub(u'Ô|Ô|Ô', r'{O^}', txt) # O-circumflex
|
||||
txt = re.sub(u'Õ|Õ|Õ', r'{O~}', txt) # O-tilde
|
||||
txt = re.sub(u'Ö|Ö|Ö', r'{O"}', txt) # O-umlaut
|
||||
txt = re.sub(u'×|×|×', r'{x}', txt) # dimension
|
||||
txt = re.sub(u'Ø|Ø|Ø', r'{O/}', txt) # O-slash
|
||||
txt = re.sub(u'Ù|Ù|Ù', r"{U`}", txt) # U-grave
|
||||
txt = re.sub(u'Ú|Ú|Ú', r"{U'}", txt) # U-acute
|
||||
txt = re.sub(u'Û|Û|Û', r'{U^}', txt) # U-circumflex
|
||||
txt = re.sub(u'Ü|Ü|Ü', r'{U"}', txt) # U-umlaut
|
||||
txt = re.sub(u'Ý|Ý|Ý', r"{Y'}", txt) # Y-grave
|
||||
txt = re.sub(u'ß|ß|ß', r'{sz}', txt) # sharp-s
|
||||
txt = re.sub(u'à|à|à', r'{a`}', txt) # a-grave
|
||||
txt = re.sub(u'á|á|á', r"{a'}", txt) # a-acute
|
||||
txt = re.sub(u'â|â|â', r'{a^}', txt) # a-circumflex
|
||||
txt = re.sub(u'ã|ã|ã', r'{a~}', txt) # a-tilde
|
||||
txt = re.sub(u'ä|ä|ä', r'{a"}', txt) # a-umlaut
|
||||
txt = re.sub(u'å|å|å', r'{ao}', txt) # a-ring
|
||||
txt = re.sub(u'æ|æ|æ', r'{ae}', txt) # ae
|
||||
txt = re.sub(u'ç|ç|ç', r'{c,}', txt) # c-cedilla
|
||||
txt = re.sub(u'è|è|è', r'{e`}', txt) # e-grave
|
||||
txt = re.sub(u'é|é|é', r"{e'}", txt) # e-acute
|
||||
txt = re.sub(u'ê|ê|ê', r'{e^}', txt) # e-circumflex
|
||||
txt = re.sub(u'ë|ë|ë', r'{e"}', txt) # e-umlaut
|
||||
txt = re.sub(u'ì|ì|ì', r'{i`}', txt) # i-grave
|
||||
txt = re.sub(u'í|í|í', r"{i'}", txt) # i-acute
|
||||
txt = re.sub(u'î|î|î', r'{i^}', txt) # i-circumflex
|
||||
txt = re.sub(u'ï|ï|ï', r'{i"}', txt) # i-umlaut
|
||||
txt = re.sub(u'ð|ð|ð', r'{d-}', txt) # eth
|
||||
txt = re.sub(u'ñ|ñ|ñ', r'{n~}', txt) # n-tilde
|
||||
txt = re.sub(u'ò|ò|ò', r'{o`}', txt) # o-grave
|
||||
txt = re.sub(u'ó|ó|ó', r"{o'}", txt) # o-acute
|
||||
txt = re.sub(u'ô|ô|ô', r'{o^}', txt) # o-circumflex
|
||||
txt = re.sub(u'õ|õ|õ', r'{o~}', txt) # o-tilde
|
||||
txt = re.sub(u'ö|ö|ö', r'{o"}', txt) # o-umlaut
|
||||
txt = re.sub(u'ø|ø|ø', r'{o/}', txt) # o-stroke
|
||||
txt = re.sub(u'ù|ù|ù', r'{u`}', txt) # u-grave
|
||||
txt = re.sub(u'ú|ú|ú', r"{u'}", txt) # u-acute
|
||||
txt = re.sub(u'û|û|û', r'{u^}', txt) # u-circumflex
|
||||
txt = re.sub(u'ü|ü|ü', r'{u"}', txt) # u-umlaut
|
||||
txt = re.sub(u'ý|ý|ý', r"{y'}", txt) # y-acute
|
||||
txt = re.sub(u'ÿ|ÿ|ÿ', r'{y"}', txt) # y-umlaut
|
||||
txt = re.sub(u'Œ|Œ|Œ', r'{OE}', txt) # OE
|
||||
txt = re.sub(u'œ|œ|œ', r'{oe}', txt) # oe
|
||||
txt = re.sub(u'Ŝ|Š|Ŝ', r'{S^}', txt) # Scaron
|
||||
txt = re.sub(u'ŝ|š|ŝ', r'{s^}', txt) # scaron
|
||||
txt = re.sub(u'•|•|•', r'{*}', txt) # bullet
|
||||
txt = re.sub(u'₣|₣', r'{Fr}', txt) # Franc
|
||||
txt = re.sub(u'₤|₤', r'{L=}', txt) # Lira
|
||||
txt = re.sub(u'₨|₨', r'{Rs}', txt) # Rupee
|
||||
txt = re.sub(u'€|€|€', r'{C=}', txt) # euro
|
||||
txt = re.sub(u'™|™|™', r'{tm}', txt) # trademark
|
||||
txt = re.sub(u'♠|♠|♠', r'{spade}', txt) # spade
|
||||
txt = re.sub(u'♣|♣|♣', r'{club}', txt) # club
|
||||
txt = re.sub(u'♥|♥|♥', r'{heart}', txt) # heart
|
||||
txt = re.sub(u'♦|♦|♦', r'{diamond}', txt) # diamond
|
||||
|
||||
# Move into main code?
|
||||
# txt = re.sub(u'\xa0', r'p. ', txt) # blank paragraph
|
||||
# txt = re.sub(u'\n\n\n\n', r'\n\np. \n\n', txt) # blank paragraph
|
||||
# txt = re.sub(u'\n \n', r'\n<br />\n', txt) # blank paragraph - br tag
|
||||
|
||||
return txt
|
@ -620,7 +620,21 @@ class Application(QApplication):
|
||||
self.original_font = QFont(QApplication.font())
|
||||
fi = gprefs['font']
|
||||
if fi is not None:
|
||||
QApplication.setFont(QFont(*fi))
|
||||
font = QFont(*(fi[:4]))
|
||||
s = gprefs.get('font_stretch', None)
|
||||
if s is not None:
|
||||
font.setStretch(s)
|
||||
QApplication.setFont(font)
|
||||
st = self.style()
|
||||
if st is not None:
|
||||
st = unicode(st.objectName()).lower()
|
||||
if (islinux or isfreebsd) and st in ('windows', 'motif', 'cde'):
|
||||
from PyQt4.Qt import QStyleFactory
|
||||
styles = set(map(unicode, QStyleFactory.keys()))
|
||||
if 'Cleanlooks' in styles:
|
||||
self.setStyle('Cleanlooks')
|
||||
else:
|
||||
self.setStyle('Plastique')
|
||||
|
||||
def _send_file_open_events(self):
|
||||
with self._file_open_lock:
|
||||
|
@ -20,6 +20,9 @@ class GenerateCatalogAction(InterfaceAction):
|
||||
action_spec = (_('Create a catalog of the books in your calibre library'), 'catalog.png', 'Catalog builder', None)
|
||||
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
|
||||
|
||||
def genesis(self):
|
||||
self.qaction.triggered.connect(self.generate_catalog)
|
||||
|
||||
def generate_catalog(self):
|
||||
rows = self.gui.library_view.selectionModel().selectedRows()
|
||||
if not rows or len(rows) < 2:
|
||||
|
@ -246,7 +246,8 @@ class ChooseLibraryAction(InterfaceAction):
|
||||
def delete_requested(self, name, location):
|
||||
loc = location.replace('/', os.sep)
|
||||
if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
|
||||
_('<b style="color: red">All files</b> from <br><br><b>%s</b><br><br> will be '
|
||||
_('<b style="color: red">All files</b> (not just ebooks) '
|
||||
'from <br><br><b>%s</b><br><br> will be '
|
||||
'<b>permanently deleted</b>. Are you sure?') % loc,
|
||||
show_copy_button=False):
|
||||
return
|
||||
|
@ -478,6 +478,10 @@ class EditMetadataAction(InterfaceAction):
|
||||
try:
|
||||
set_title = not mi.is_null('title')
|
||||
set_authors = not mi.is_null('authors')
|
||||
idents = db.get_identifiers(i, index_is_id=True)
|
||||
if mi.identifiers:
|
||||
idents.update(mi.identifiers)
|
||||
mi.identifiers = idents
|
||||
db.set_metadata(i, mi, commit=False, set_title=set_title,
|
||||
set_authors=set_authors, notify=False)
|
||||
self.applied_ids.append(i)
|
||||
|
@ -10,6 +10,7 @@ from functools import partial
|
||||
|
||||
from PyQt4.Qt import QMenu
|
||||
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2.actions import InterfaceAction
|
||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||
|
||||
@ -19,24 +20,86 @@ class StoreAction(InterfaceAction):
|
||||
action_spec = (_('Get books'), 'store.png', None, None)
|
||||
|
||||
def genesis(self):
|
||||
self.qaction.triggered.connect(self.search)
|
||||
self.qaction.triggered.connect(self.do_search)
|
||||
self.store_menu = QMenu()
|
||||
self.load_menu()
|
||||
|
||||
def load_menu(self):
|
||||
self.store_menu.clear()
|
||||
self.store_menu.addAction(_('Search'), self.search)
|
||||
self.store_menu.addAction(_('Search for ebooks'), self.search)
|
||||
self.store_menu.addAction(_('Search for this author'), self.search_author)
|
||||
self.store_menu.addAction(_('Search for this title'), self.search_title)
|
||||
self.store_menu.addAction(_('Search for this book'), self.search_author_title)
|
||||
self.store_menu.addSeparator()
|
||||
for n, p in self.gui.istores.items():
|
||||
self.store_menu.addAction(n, partial(self.open_store, p))
|
||||
self.store_list_menu = self.store_menu.addMenu(_('Stores'))
|
||||
for n, p in sorted(self.gui.istores.items(), key=lambda x: x[0].lower()):
|
||||
self.store_list_menu.addAction(n, partial(self.open_store, p))
|
||||
self.qaction.setMenu(self.store_menu)
|
||||
|
||||
def search(self):
|
||||
def do_search(self):
|
||||
return self.search()
|
||||
|
||||
def search(self, query=''):
|
||||
self.show_disclaimer()
|
||||
from calibre.gui2.store.search.search import SearchDialog
|
||||
sd = SearchDialog(self.gui.istores, self.gui)
|
||||
sd = SearchDialog(self.gui.istores, self.gui, query)
|
||||
sd.exec_()
|
||||
|
||||
def _get_selected_row(self):
|
||||
rows = self.gui.current_view().selectionModel().selectedRows()
|
||||
if not rows or len(rows) == 0:
|
||||
return None
|
||||
return rows[0].row()
|
||||
|
||||
def _get_author(self, row):
|
||||
author = ''
|
||||
if self.gui.current_view() is self.gui.library_view:
|
||||
author = self.gui.library_view.model().authors(row)
|
||||
if author:
|
||||
author = author.replace('|', ' ')
|
||||
else:
|
||||
mi = self.gui.current_view().model().get_book_display_info(row)
|
||||
author = ' & '.join(mi.authors)
|
||||
|
||||
return author
|
||||
|
||||
def search_author(self):
|
||||
row = self._get_selected_row()
|
||||
if row == None:
|
||||
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
||||
return
|
||||
|
||||
query = 'author:"%s"' % self._get_author(row)
|
||||
self.search(query)
|
||||
|
||||
def _get_title(self, row):
|
||||
title = ''
|
||||
if self.gui.current_view() is self.gui.library_view:
|
||||
title = self.gui.library_view.model().title(row)
|
||||
else:
|
||||
mi = self.gui.current_view().model().get_book_display_info(row)
|
||||
title = mi.title
|
||||
|
||||
return title
|
||||
|
||||
def search_title(self):
|
||||
row = self._get_selected_row()
|
||||
if row == None:
|
||||
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
||||
return
|
||||
|
||||
query = 'title:"%s"' % self._get_title(row)
|
||||
self.search(query)
|
||||
|
||||
def search_author_title(self):
|
||||
row = self._get_selected_row()
|
||||
if row == None:
|
||||
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
|
||||
return
|
||||
|
||||
query = 'author:"%s" title:"%s"' % (self._get_author(row), self._get_title(row))
|
||||
self.search(query)
|
||||
|
||||
def open_store(self, store_plugin):
|
||||
self.show_disclaimer()
|
||||
store_plugin.open(self.gui)
|
||||
|
@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
|
||||
Widget.__init__(self, parent,
|
||||
['newline', 'max_line_length', 'force_max_line_length',
|
||||
'inline_toc', 'txt_output_formatting', 'keep_links', 'keep_image_references',
|
||||
'txt_output_encoding'])
|
||||
'keep_color', 'txt_output_encoding'])
|
||||
self.db, self.book_id = db, book_id
|
||||
for x in get_option('newline').option.choices:
|
||||
self.opt_newline.addItem(x)
|
||||
|
@ -122,6 +122,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="opt_keep_color">
|
||||
<property name="text">
|
||||
<string>Keep text color, when possible</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -439,6 +439,7 @@ def populate_metadata_page(layout, db, book_id, bulk=False, two_column=False, pa
|
||||
w = widget_factory(dt, col)
|
||||
ans.append(w)
|
||||
for c in range(0, len(w.widgets), 2):
|
||||
w.widgets[c].setWordWrap(True)
|
||||
w.widgets[c].setBuddy(w.widgets[c+1])
|
||||
layout.addWidget(w.widgets[c], row, column)
|
||||
layout.addWidget(w.widgets[c+1], row, column+1)
|
||||
|
@ -3,12 +3,13 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from PyQt4.Qt import Qt, QDialog, QTableWidgetItem, QAbstractItemView
|
||||
from PyQt4.Qt import (Qt, QDialog, QTableWidgetItem, QAbstractItemView, QIcon,
|
||||
QDialogButtonBox, QFrame, QLabel, QTimer, QMenu, QApplication)
|
||||
|
||||
from calibre.ebooks.metadata import author_to_author_sort
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2.dialogs.edit_authors_dialog_ui import Ui_EditAuthorsDialog
|
||||
from calibre.utils.icu import sort_key, strcmp
|
||||
from calibre.utils.icu import sort_key
|
||||
|
||||
class tableItem(QTableWidgetItem):
|
||||
def __ge__(self, other):
|
||||
@ -19,7 +20,7 @@ class tableItem(QTableWidgetItem):
|
||||
|
||||
class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
|
||||
|
||||
def __init__(self, parent, db, id_to_select):
|
||||
def __init__(self, parent, db, id_to_select, select_sort):
|
||||
QDialog.__init__(self, parent)
|
||||
Ui_EditAuthorsDialog.__init__(self)
|
||||
self.setupUi(self)
|
||||
@ -30,14 +31,23 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
|
||||
|
||||
self.buttonBox.accepted.connect(self.accepted)
|
||||
|
||||
# Set up the column headings
|
||||
self.table.setSelectionMode(QAbstractItemView.SingleSelection)
|
||||
self.table.setColumnCount(2)
|
||||
self.table.setHorizontalHeaderLabels([_('Author'), _('Author sort')])
|
||||
self.down_arrow_icon = QIcon(I('arrow-down.png'))
|
||||
self.up_arrow_icon = QIcon(I('arrow-up.png'))
|
||||
self.blank_icon = QIcon(I('blank.png'))
|
||||
self.auth_col = QTableWidgetItem(_('Author'))
|
||||
self.table.setHorizontalHeaderItem(0, self.auth_col)
|
||||
self.auth_col.setIcon(self.blank_icon)
|
||||
self.aus_col = QTableWidgetItem(_('Author sort'))
|
||||
self.table.setHorizontalHeaderItem(1, self.aus_col)
|
||||
self.aus_col.setIcon(self.up_arrow_icon)
|
||||
|
||||
# Add the data
|
||||
self.authors = {}
|
||||
auts = db.get_authors_with_ids()
|
||||
self.table.setRowCount(len(auts))
|
||||
setattr(self.table, '__lt__', lambda x, y: True if strcmp(x, y) < 0 else False)
|
||||
select_item = None
|
||||
for row, (id, author, sort) in enumerate(auts):
|
||||
author = author.replace('|', ',')
|
||||
@ -48,7 +58,10 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
|
||||
self.table.setItem(row, 0, aut)
|
||||
self.table.setItem(row, 1, sort)
|
||||
if id == id_to_select:
|
||||
select_item = sort
|
||||
if select_sort:
|
||||
select_item = sort
|
||||
else:
|
||||
select_item = aut
|
||||
self.table.resizeColumnsToContents()
|
||||
|
||||
# set up the cellChanged signal only after the table is filled
|
||||
@ -69,23 +82,153 @@ class EditAuthorsDialog(QDialog, Ui_EditAuthorsDialog):
|
||||
self.recalc_author_sort.clicked.connect(self.do_recalc_author_sort)
|
||||
self.auth_sort_to_author.clicked.connect(self.do_auth_sort_to_author)
|
||||
|
||||
# Position on the desired item
|
||||
if select_item is not None:
|
||||
self.table.setCurrentItem(select_item)
|
||||
self.table.editItem(select_item)
|
||||
self.start_find_pos = select_item.row() * 2 + select_item.column()
|
||||
else:
|
||||
self.table.setCurrentCell(0, 0)
|
||||
self.start_find_pos = -1
|
||||
|
||||
# set up the search box
|
||||
self.find_box.initialize('manage_authors_search')
|
||||
self.find_box.lineEdit().returnPressed.connect(self.do_find)
|
||||
self.find_box.editTextChanged.connect(self.find_text_changed)
|
||||
self.find_button.clicked.connect(self.do_find)
|
||||
|
||||
l = QLabel(self.table)
|
||||
self.not_found_label = l
|
||||
l.setFrameStyle(QFrame.StyledPanel)
|
||||
l.setAutoFillBackground(True)
|
||||
l.setText(_('No matches found'))
|
||||
l.setAlignment(Qt.AlignVCenter)
|
||||
l.resize(l.sizeHint())
|
||||
l.move(10,20)
|
||||
l.setVisible(False)
|
||||
self.not_found_label.move(40, 40)
|
||||
self.not_found_label_timer = QTimer()
|
||||
self.not_found_label_timer.setSingleShot(True)
|
||||
self.not_found_label_timer.timeout.connect(
|
||||
self.not_found_label_timer_event, type=Qt.QueuedConnection)
|
||||
|
||||
self.table.setContextMenuPolicy(Qt.CustomContextMenu)
|
||||
self.table.customContextMenuRequested .connect(self.show_context_menu)
|
||||
|
||||
def show_context_menu(self, point):
|
||||
self.context_item = self.table.itemAt(point)
|
||||
case_menu = QMenu(_('Change Case'))
|
||||
action_upper_case = case_menu.addAction(_('Upper Case'))
|
||||
action_lower_case = case_menu.addAction(_('Lower Case'))
|
||||
action_swap_case = case_menu.addAction(_('Swap Case'))
|
||||
action_title_case = case_menu.addAction(_('Title Case'))
|
||||
action_capitalize = case_menu.addAction(_('Capitalize'))
|
||||
|
||||
action_upper_case.triggered.connect(self.upper_case)
|
||||
action_lower_case.triggered.connect(self.lower_case)
|
||||
action_swap_case.triggered.connect(self.swap_case)
|
||||
action_title_case.triggered.connect(self.title_case)
|
||||
action_capitalize.triggered.connect(self.capitalize)
|
||||
|
||||
m = self.au_context_menu = QMenu()
|
||||
ca = m.addAction(_('Copy'))
|
||||
ca.triggered.connect(self.copy_to_clipboard)
|
||||
ca = m.addAction(_('Paste'))
|
||||
ca.triggered.connect(self.paste_from_clipboard)
|
||||
m.addSeparator()
|
||||
|
||||
if self.context_item.column() == 0:
|
||||
ca = m.addAction(_('Copy to author sort'))
|
||||
ca.triggered.connect(self.copy_au_to_aus)
|
||||
else:
|
||||
ca = m.addAction(_('Copy to author'))
|
||||
ca.triggered.connect(self.copy_aus_to_au)
|
||||
m.addSeparator()
|
||||
m.addMenu(case_menu)
|
||||
m.exec_(self.table.mapToGlobal(point))
|
||||
|
||||
def copy_to_clipboard(self):
|
||||
cb = QApplication.clipboard()
|
||||
cb.setText(unicode(self.context_item.text()))
|
||||
|
||||
def paste_from_clipboard(self):
|
||||
cb = QApplication.clipboard()
|
||||
self.context_item.setText(cb.text())
|
||||
|
||||
def upper_case(self):
|
||||
self.context_item.setText(icu_upper(unicode(self.context_item.text())))
|
||||
|
||||
def lower_case(self):
|
||||
self.context_item.setText(icu_lower(unicode(self.context_item.text())))
|
||||
|
||||
def swap_case(self):
|
||||
self.context_item.setText(unicode(self.context_item.text()).swapcase())
|
||||
|
||||
def title_case(self):
|
||||
from calibre.utils.titlecase import titlecase
|
||||
self.context_item.setText(titlecase(unicode(self.context_item.text())))
|
||||
|
||||
def capitalize(self):
|
||||
from calibre.utils.icu import capitalize
|
||||
self.context_item.setText(capitalize(unicode(self.context_item.text())))
|
||||
|
||||
def copy_aus_to_au(self):
|
||||
row = self.context_item.row()
|
||||
dest = self.table.item(row, 0)
|
||||
dest.setText(self.context_item.text())
|
||||
|
||||
def copy_au_to_aus(self):
|
||||
row = self.context_item.row()
|
||||
dest = self.table.item(row, 1)
|
||||
dest.setText(self.context_item.text())
|
||||
|
||||
def not_found_label_timer_event(self):
|
||||
self.not_found_label.setVisible(False)
|
||||
|
||||
def find_text_changed(self):
|
||||
self.start_find_pos = -1
|
||||
|
||||
def do_find(self):
|
||||
self.not_found_label.setVisible(False)
|
||||
# For some reason the button box keeps stealing the RETURN shortcut.
|
||||
# Steal it back
|
||||
self.buttonBox.button(QDialogButtonBox.Ok).setDefault(False)
|
||||
self.buttonBox.button(QDialogButtonBox.Ok).setAutoDefault(False)
|
||||
self.buttonBox.button(QDialogButtonBox.Cancel).setDefault(False)
|
||||
self.buttonBox.button(QDialogButtonBox.Cancel).setAutoDefault(False)
|
||||
st = icu_lower(unicode(self.find_box.currentText()))
|
||||
|
||||
for i in range(0, self.table.rowCount()*2):
|
||||
self.start_find_pos = (self.start_find_pos + 1) % (self.table.rowCount()*2)
|
||||
r = (self.start_find_pos/2)%self.table.rowCount()
|
||||
c = self.start_find_pos % 2
|
||||
item = self.table.item(r, c)
|
||||
text = icu_lower(unicode(item.text()))
|
||||
if st in text:
|
||||
self.table.setCurrentItem(item)
|
||||
self.table.setFocus(True)
|
||||
return
|
||||
# Nothing found. Pop up the little dialog for 1.5 seconds
|
||||
self.not_found_label.setVisible(True)
|
||||
self.not_found_label_timer.start(1500)
|
||||
|
||||
def do_sort_by_author(self):
|
||||
self.author_order = 1 if self.author_order == 0 else 0
|
||||
self.table.sortByColumn(0, self.author_order)
|
||||
self.sort_by_author.setChecked(True)
|
||||
self.sort_by_author_sort.setChecked(False)
|
||||
self.auth_col.setIcon(self.down_arrow_icon if self.author_order
|
||||
else self.up_arrow_icon)
|
||||
self.aus_col.setIcon(self.blank_icon)
|
||||
|
||||
def do_sort_by_author_sort(self):
|
||||
self.author_sort_order = 1 if self.author_sort_order == 0 else 0
|
||||
self.table.sortByColumn(1, self.author_sort_order)
|
||||
self.sort_by_author.setChecked(False)
|
||||
self.sort_by_author_sort.setChecked(True)
|
||||
self.aus_col.setIcon(self.down_arrow_icon if self.author_sort_order
|
||||
else self.up_arrow_icon)
|
||||
self.auth_col.setIcon(self.blank_icon)
|
||||
|
||||
def accepted(self):
|
||||
self.result = []
|
||||
|
@ -20,6 +20,50 @@
|
||||
<string>Manage authors</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="verticalLayout">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="">
|
||||
<item>
|
||||
<widget class="QLabel">
|
||||
<property name="text">
|
||||
<string>&Search for:</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>find_box</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="HistoryLineEdit" name="find_box">
|
||||
<property name="minimumSize">
|
||||
<size>
|
||||
<width>200</width>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QPushButton" name="find_button">
|
||||
<property name="text">
|
||||
<string>F&ind</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer>
|
||||
<property name="orientation">
|
||||
<enum>Qt::Horizontal</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>40</width>
|
||||
<height>20</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QTableWidget" name="table">
|
||||
<property name="sizePolicy">
|
||||
@ -143,4 +187,11 @@ after changing Preferences->Advanced->Tweaks->Author sort name algorith
|
||||
</hints>
|
||||
</connection>
|
||||
</connections>
|
||||
<customwidgets>
|
||||
<customwidget>
|
||||
<class>HistoryLineEdit</class>
|
||||
<extends>QComboBox</extends>
|
||||
<header>calibre/gui2/widgets.h</header>
|
||||
</customwidget>
|
||||
</customwidgets>
|
||||
</ui>
|
||||
|
@ -19,17 +19,23 @@ class MessageBox(QDialog, Ui_Dialog): # {{{
|
||||
INFO = 2
|
||||
QUESTION = 3
|
||||
|
||||
def __init__(self, type_, title, msg, det_msg='', show_copy_button=True,
|
||||
parent=None):
|
||||
def __init__(self, type_, title, msg,
|
||||
det_msg='',
|
||||
q_icon=None,
|
||||
show_copy_button=True,
|
||||
parent=None):
|
||||
QDialog.__init__(self, parent)
|
||||
icon = {
|
||||
self.ERROR : 'error',
|
||||
self.WARNING: 'warning',
|
||||
self.INFO: 'information',
|
||||
self.QUESTION: 'question',
|
||||
}[type_]
|
||||
icon = 'dialog_%s.png'%icon
|
||||
self.icon = QIcon(I(icon))
|
||||
if q_icon is None:
|
||||
icon = {
|
||||
self.ERROR : 'error',
|
||||
self.WARNING: 'warning',
|
||||
self.INFO: 'information',
|
||||
self.QUESTION: 'question',
|
||||
}[type_]
|
||||
icon = 'dialog_%s.png'%icon
|
||||
self.icon = QIcon(I(icon))
|
||||
else:
|
||||
self.icon = q_icon
|
||||
self.setupUi(self)
|
||||
|
||||
self.setWindowTitle(title)
|
||||
@ -44,7 +50,6 @@ class MessageBox(QDialog, Ui_Dialog): # {{{
|
||||
self.bb.ActionRole)
|
||||
self.ctc_button.clicked.connect(self.copy_to_clipboard)
|
||||
|
||||
|
||||
self.show_det_msg = _('Show &details')
|
||||
self.hide_det_msg = _('Hide &details')
|
||||
self.det_msg_toggle = self.bb.addButton(self.show_det_msg, self.bb.ActionRole)
|
||||
|
@ -506,6 +506,9 @@ class BooksModel(QAbstractTableModel): # {{{
|
||||
def id(self, row):
|
||||
return self.db.id(getattr(row, 'row', lambda:row)())
|
||||
|
||||
def authors(self, row_number):
|
||||
return self.db.authors(row_number)
|
||||
|
||||
def title(self, row_number):
|
||||
return self.db.title(row_number)
|
||||
|
||||
|
@ -439,10 +439,16 @@ class BooksView(QTableView): # {{{
|
||||
|
||||
if tweaks['sort_columns_at_startup'] is not None:
|
||||
sh = []
|
||||
for c,d in tweaks['sort_columns_at_startup']:
|
||||
if not isinstance(d, bool):
|
||||
d = True if d == 0 else False
|
||||
sh.append((c, d))
|
||||
try:
|
||||
for c,d in tweaks['sort_columns_at_startup']:
|
||||
if not isinstance(d, bool):
|
||||
d = True if d == 0 else False
|
||||
sh.append((c, d))
|
||||
except:
|
||||
# Ignore invalid tweak values as users seem to often get them
|
||||
# wrong
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
old_state['sort_history'] = sh
|
||||
|
||||
self.apply_state(old_state)
|
||||
|
@ -299,13 +299,13 @@ def run_gui(opts, args, actions, listener, app, gui_debug=None):
|
||||
if getattr(runner.main, 'debug_on_restart', False):
|
||||
run_in_debug_mode()
|
||||
else:
|
||||
import subprocess
|
||||
print 'Restarting with:', e, sys.argv
|
||||
if hasattr(sys, 'frameworks_dir'):
|
||||
app = os.path.dirname(os.path.dirname(sys.frameworks_dir))
|
||||
import subprocess
|
||||
subprocess.Popen('sleep 3s; open '+app, shell=True)
|
||||
else:
|
||||
os.execvp(e, sys.argv)
|
||||
subprocess.Popen([e] + sys.argv[1:])
|
||||
else:
|
||||
if iswindows:
|
||||
try:
|
||||
|
@ -9,8 +9,8 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import textwrap, re, os
|
||||
|
||||
from PyQt4.Qt import (Qt, QDateEdit, QDate, pyqtSignal,
|
||||
QIcon, QToolButton, QWidget, QLabel, QGridLayout,
|
||||
from PyQt4.Qt import (Qt, QDateEdit, QDate, pyqtSignal, QMessageBox,
|
||||
QIcon, QToolButton, QWidget, QLabel, QGridLayout, QApplication,
|
||||
QDoubleSpinBox, QListWidgetItem, QSize, QPixmap,
|
||||
QPushButton, QSpinBox, QLineEdit, QSizePolicy)
|
||||
|
||||
@ -19,10 +19,10 @@ from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.config import tweaks, prefs
|
||||
from calibre.ebooks.metadata import (title_sort, authors_to_string,
|
||||
string_to_authors, check_isbn)
|
||||
string_to_authors, check_isbn, authors_to_sort_string)
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.gui2 import (file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE,
|
||||
choose_files, error_dialog, choose_images, question_dialog)
|
||||
choose_files, error_dialog, choose_images)
|
||||
from calibre.utils.date import local_tz, qt_to_dt
|
||||
from calibre import strftime
|
||||
from calibre.ebooks import BOOK_EXTENSIONS
|
||||
@ -31,6 +31,16 @@ from calibre.utils.date import utcfromtimestamp
|
||||
from calibre.gui2.comments_editor import Editor
|
||||
from calibre.library.comments import comments_to_html
|
||||
from calibre.gui2.dialogs.tag_editor import TagEditor
|
||||
from calibre.utils.icu import strcmp
|
||||
|
||||
def save_dialog(parent, title, msg, det_msg=''):
|
||||
d = QMessageBox(parent)
|
||||
d.setWindowTitle(title)
|
||||
d.setText(msg)
|
||||
d.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel)
|
||||
return d.exec_()
|
||||
|
||||
|
||||
|
||||
'''
|
||||
The interface common to all widgets used to set basic metadata
|
||||
@ -156,7 +166,7 @@ class AuthorsEdit(MultiCompleteComboBox):
|
||||
TOOLTIP = ''
|
||||
LABEL = _('&Author(s):')
|
||||
|
||||
def __init__(self, parent):
|
||||
def __init__(self, parent, manage_authors):
|
||||
self.dialog = parent
|
||||
self.books_to_refresh = set([])
|
||||
MultiCompleteComboBox.__init__(self, parent)
|
||||
@ -164,6 +174,28 @@ class AuthorsEdit(MultiCompleteComboBox):
|
||||
self.setWhatsThis(self.TOOLTIP)
|
||||
self.setEditable(True)
|
||||
self.setSizeAdjustPolicy(self.AdjustToMinimumContentsLengthWithIcon)
|
||||
manage_authors.triggered.connect(self.manage_authors)
|
||||
|
||||
def manage_authors(self):
|
||||
if self.original_val != self.current_val:
|
||||
d = save_dialog(self, _('Authors changed'),
|
||||
_('You have changed the authors for this book. You must save '
|
||||
'these changes before you can use Manage authors. Do you '
|
||||
'want to save these changes?'))
|
||||
if d == QMessageBox.Cancel:
|
||||
return
|
||||
if d == QMessageBox.Yes:
|
||||
self.commit(self.db, self.id_)
|
||||
self.db.commit()
|
||||
self.original_val = self.current_val
|
||||
else:
|
||||
self.current_val = self.original_val
|
||||
first_author = self.current_val[0] if len(self.current_val) else None
|
||||
first_author_id = self.db.get_author_id(first_author) if first_author else None
|
||||
self.dialog.parent().do_author_sort_edit(self, first_author_id,
|
||||
select_sort=False)
|
||||
self.initialize(self.db, self.id_)
|
||||
self.dialog.author_sort.initialize(self.db, self.id_)
|
||||
|
||||
def get_default(self):
|
||||
return _('Unknown')
|
||||
@ -175,8 +207,8 @@ class AuthorsEdit(MultiCompleteComboBox):
|
||||
self.clear()
|
||||
for i in all_authors:
|
||||
id, name = i
|
||||
name = [name.strip().replace('|', ',') for n in name.split(',')]
|
||||
self.addItem(authors_to_string(name))
|
||||
name = name.strip().replace('|', ',')
|
||||
self.addItem(name)
|
||||
|
||||
self.set_separator('&')
|
||||
self.set_space_before_sep(True)
|
||||
@ -188,6 +220,8 @@ class AuthorsEdit(MultiCompleteComboBox):
|
||||
au = _('Unknown')
|
||||
self.current_val = [a.strip().replace('|', ',') for a in au.split(',')]
|
||||
self.original_val = self.current_val
|
||||
self.id_ = id_
|
||||
self.db = db
|
||||
|
||||
def commit(self, db, id_):
|
||||
authors = self.current_val
|
||||
@ -238,7 +272,7 @@ class AuthorSortEdit(EnLineEdit):
|
||||
'No action is required if this is what you want.'))
|
||||
self.tooltips = (ok_tooltip, bad_tooltip)
|
||||
|
||||
self.authors_edit.editTextChanged.connect(self.update_state)
|
||||
self.authors_edit.editTextChanged.connect(self.update_state_and_val)
|
||||
self.textChanged.connect(self.update_state)
|
||||
|
||||
autogen_button.clicked.connect(self.auto_generate)
|
||||
@ -260,12 +294,19 @@ class AuthorSortEdit(EnLineEdit):
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
def update_state_and_val(self):
|
||||
# Handle case change if the authors box changed
|
||||
aus = authors_to_sort_string(self.authors_edit.current_val)
|
||||
if strcmp(aus, self.current_val) == 0:
|
||||
self.current_val = aus
|
||||
self.update_state()
|
||||
|
||||
def update_state(self, *args):
|
||||
au = unicode(self.authors_edit.text())
|
||||
au = re.sub(r'\s+et al\.$', '', au)
|
||||
au = self.db.author_sort_from_authors(string_to_authors(au))
|
||||
|
||||
normal = au == self.current_val
|
||||
normal = strcmp(au, self.current_val) == 0
|
||||
if normal:
|
||||
col = 'rgb(0, 255, 0, 20%)'
|
||||
else:
|
||||
@ -900,10 +941,13 @@ class TagsEdit(MultiCompleteLineEdit): # {{{
|
||||
|
||||
def edit(self, db, id_):
|
||||
if self.changed:
|
||||
if question_dialog(self, _('Tags changed'),
|
||||
d = save_dialog(self, _('Tags changed'),
|
||||
_('You have changed the tags. In order to use the tags'
|
||||
' editor, you must either discard or apply these '
|
||||
'changes. Apply changes?'), show_copy_button=False):
|
||||
'changes. Apply changes?'))
|
||||
if d == QMessageBox.Cancel:
|
||||
return
|
||||
if d == QMessageBox.Yes:
|
||||
self.commit(db, id_)
|
||||
db.commit()
|
||||
self.original_val = self.current_val
|
||||
@ -993,6 +1037,13 @@ class IdentifiersEdit(QLineEdit): # {{{
|
||||
self.setToolTip(tt+extra)
|
||||
self.setStyleSheet('QLineEdit { background-color: %s }'%col)
|
||||
|
||||
def paste_isbn(self):
|
||||
text = unicode(QApplication.clipboard().text()).strip()
|
||||
if text:
|
||||
vals = self.current_val
|
||||
vals['isbn'] = text
|
||||
self.current_val = vals
|
||||
|
||||
# }}}
|
||||
|
||||
class PublisherEdit(MultiCompleteComboBox): # {{{
|
||||
@ -1075,7 +1126,7 @@ class DateEdit(QDateEdit): # {{{
|
||||
@dynamic_property
|
||||
def current_val(self):
|
||||
def fget(self):
|
||||
return qt_to_dt(self.date())
|
||||
return qt_to_dt(self.date(), as_utc=False)
|
||||
def fset(self, val):
|
||||
if val is None:
|
||||
val = UNDEFINED_DATE
|
||||
|
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import textwrap
|
||||
|
||||
from PyQt4.Qt import (QWidget, QGridLayout, QGroupBox, QListView, Qt, QSpinBox,
|
||||
QDoubleSpinBox, QCheckBox, QLineEdit, QComboBox, QLabel)
|
||||
QDoubleSpinBox, QCheckBox, QLineEdit, QComboBox, QLabel, QVariant)
|
||||
|
||||
from calibre.gui2.preferences.metadata_sources import FieldsModel as FM
|
||||
|
||||
@ -95,9 +95,9 @@ class ConfigWidget(QWidget):
|
||||
widget.setChecked(bool(val))
|
||||
elif opt.type == 'choices':
|
||||
widget = QComboBox(self)
|
||||
for x in opt.choices:
|
||||
widget.addItem(x)
|
||||
idx = opt.choices.index(val)
|
||||
for key, label in opt.choices.iteritems():
|
||||
widget.addItem(label, QVariant(key))
|
||||
idx = widget.findData(QVariant(val))
|
||||
widget.setCurrentIndex(idx)
|
||||
widget.opt = opt
|
||||
widget.setToolTip(textwrap.fill(opt.desc))
|
||||
@ -124,7 +124,8 @@ class ConfigWidget(QWidget):
|
||||
elif isinstance(w, QCheckBox):
|
||||
val = w.isChecked()
|
||||
elif isinstance(w, QComboBox):
|
||||
val = unicode(w.currentText())
|
||||
idx = w.currentIndex()
|
||||
val = unicode(w.itemData(idx).toString())
|
||||
self.plugin.prefs[w.opt.name] = val
|
||||
|
||||
|
||||
|
@ -31,6 +31,7 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
view_format = pyqtSignal(object, object)
|
||||
cc_two_column = tweaks['metadata_single_use_2_cols_for_custom_fields']
|
||||
one_line_comments_toolbar = False
|
||||
use_toolbutton_for_config_metadata = True
|
||||
|
||||
def __init__(self, db, parent=None):
|
||||
self.db = db
|
||||
@ -69,7 +70,11 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.setLayout(self.l)
|
||||
self.l.setMargin(0)
|
||||
self.l.addWidget(self.scroll_area)
|
||||
self.l.addWidget(self.button_box)
|
||||
ll = self.button_box_layout = QHBoxLayout()
|
||||
self.l.addLayout(ll)
|
||||
ll.addSpacing(10)
|
||||
ll.addWidget(self.button_box)
|
||||
ll.addSpacing(10)
|
||||
|
||||
self.setWindowIcon(QIcon(I('edit_input.png')))
|
||||
self.setWindowTitle(_('Edit Metadata'))
|
||||
@ -103,16 +108,18 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.basic_metadata_widgets.extend([self.title, self.title_sort])
|
||||
|
||||
self.deduce_author_sort_button = b = QToolButton(self)
|
||||
b.setToolTip(_(
|
||||
'Automatically create the author sort entry based on the current'
|
||||
' author entry.\n'
|
||||
'Using this button to create author sort will change author sort from'
|
||||
' red to green.'))
|
||||
b.setToolTip('<p>' +
|
||||
_('Automatically create the author sort entry based on the current '
|
||||
'author entry. Using this button to create author sort will '
|
||||
'change author sort from red to green. There is a menu of '
|
||||
'functions available under this button. Click and hold '
|
||||
'on the button to see it.') + '</p>')
|
||||
b.m = m = QMenu()
|
||||
ac = m.addAction(QIcon(I('forward.png')), _('Set author sort from author'))
|
||||
ac2 = m.addAction(QIcon(I('back.png')), _('Set author from author sort'))
|
||||
ac3 = m.addAction(QIcon(I('user_profile.png')), _('Manage authors'))
|
||||
b.setMenu(m)
|
||||
self.authors = AuthorsEdit(self)
|
||||
self.authors = AuthorsEdit(self, ac3)
|
||||
self.author_sort = AuthorSortEdit(self, self.authors, b, self.db, ac,
|
||||
ac2)
|
||||
self.basic_metadata_widgets.extend([self.authors, self.author_sort])
|
||||
@ -123,6 +130,13 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
'Swap the author and title'))
|
||||
self.swap_title_author_button.clicked.connect(self.swap_title_author)
|
||||
|
||||
self.manage_authors_button = QToolButton(self)
|
||||
self.manage_authors_button.setIcon(QIcon(I('user_profile.png')))
|
||||
self.manage_authors_button.setToolTip('<p>' + _(
|
||||
'Manage authors. Use to rename authors and correct '
|
||||
'individual author\'s sort values') + '</p>')
|
||||
self.manage_authors_button.clicked.connect(self.authors.manage_authors)
|
||||
|
||||
self.series = SeriesEdit(self)
|
||||
self.remove_unused_series_button = QToolButton(self)
|
||||
self.remove_unused_series_button.setToolTip(
|
||||
@ -159,6 +173,12 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
self.clear_identifiers_button = QToolButton(self)
|
||||
self.clear_identifiers_button.setIcon(QIcon(I('trash.png')))
|
||||
self.clear_identifiers_button.clicked.connect(self.identifiers.clear)
|
||||
self.paste_isbn_button = QToolButton(self)
|
||||
self.paste_isbn_button.setToolTip('<p>' +
|
||||
_('Paste the contents of the clipboard into the '
|
||||
'identifiers box prefixed with isbn:') + '</p>')
|
||||
self.paste_isbn_button.setIcon(QIcon(I('edit-paste.png')))
|
||||
self.paste_isbn_button.clicked.connect(self.identifiers.paste_isbn)
|
||||
|
||||
self.publisher = PublisherEdit(self)
|
||||
self.basic_metadata_widgets.append(self.publisher)
|
||||
@ -174,7 +194,12 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
font.setBold(True)
|
||||
self.fetch_metadata_button.setFont(font)
|
||||
|
||||
self.config_metadata_button = QToolButton(self)
|
||||
if self.use_toolbutton_for_config_metadata:
|
||||
self.config_metadata_button = QToolButton(self)
|
||||
self.config_metadata_button.setIcon(QIcon(I('config.png')))
|
||||
else:
|
||||
self.config_metadata_button = QPushButton(self)
|
||||
self.config_metadata_button.setText(_('Configure download metadata'))
|
||||
self.config_metadata_button.setIcon(QIcon(I('config.png')))
|
||||
self.config_metadata_button.clicked.connect(self.configure_metadata)
|
||||
self.config_metadata_button.setToolTip(
|
||||
@ -290,13 +315,17 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
show=True)
|
||||
return
|
||||
|
||||
def update_from_mi(self, mi):
|
||||
def update_from_mi(self, mi, update_sorts=True):
|
||||
if not mi.is_null('title'):
|
||||
self.title.current_val = mi.title
|
||||
if update_sorts:
|
||||
self.title_sort.auto_generate()
|
||||
if not mi.is_null('authors'):
|
||||
self.authors.current_val = mi.authors
|
||||
if not mi.is_null('author_sort'):
|
||||
self.author_sort.current_val = mi.author_sort
|
||||
elif update_sorts:
|
||||
self.author_sort.auto_generate()
|
||||
if not mi.is_null('rating'):
|
||||
try:
|
||||
self.rating.current_val = mi.rating
|
||||
@ -307,7 +336,9 @@ class MetadataSingleDialogBase(ResizableDialog):
|
||||
if not mi.is_null('tags'):
|
||||
self.tags.current_val = mi.tags
|
||||
if not mi.is_null('identifiers'):
|
||||
self.identifiers.current_val = mi.identifiers
|
||||
current = self.identifiers.current_val
|
||||
current.update(mi.identifiers)
|
||||
self.identifiers.current_val = current
|
||||
if not mi.is_null('pubdate'):
|
||||
self.pubdate.current_val = mi.pubdate
|
||||
if not mi.is_null('series') and mi.series.strip():
|
||||
@ -493,7 +524,8 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
|
||||
sto(one, two)
|
||||
sto(two, three)
|
||||
|
||||
tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
|
||||
tl.addWidget(self.swap_title_author_button, 0, 0, 1, 1)
|
||||
tl.addWidget(self.manage_authors_button, 1, 0, 1, 1)
|
||||
|
||||
create_row(0, self.title, self.deduce_title_sort_button, self.title_sort)
|
||||
sto(self.title_sort, self.authors)
|
||||
@ -502,6 +534,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
|
||||
create_row(2, self.series, self.remove_unused_series_button,
|
||||
self.series_index, icon='trash.png')
|
||||
sto(self.series_index, self.swap_title_author_button)
|
||||
sto(self.swap_title_author_button, self.manage_authors_button)
|
||||
|
||||
tl.addWidget(self.formats_manager, 0, 6, 3, 1)
|
||||
|
||||
@ -512,7 +545,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
|
||||
self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self)
|
||||
gb.l = l = QGridLayout()
|
||||
gb.setLayout(l)
|
||||
sto(self.swap_title_author_button, self.cover.buttons[0])
|
||||
sto(self.manage_authors_button, self.cover.buttons[0])
|
||||
for i, b in enumerate(self.cover.buttons[:3]):
|
||||
l.addWidget(b, 0, i, 1, 1)
|
||||
sto(b, self.cover.buttons[i+1])
|
||||
@ -526,10 +559,16 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
|
||||
w.setLayout(w.l)
|
||||
l.setMargin(0)
|
||||
self.splitter.addWidget(w)
|
||||
def create_row2(row, widget, button=None):
|
||||
def create_row2(row, widget, button=None, front_button=None):
|
||||
row += 1
|
||||
ql = BuddyLabel(widget)
|
||||
l.addWidget(ql, row, 0, 1, 1)
|
||||
if front_button:
|
||||
ltl = QHBoxLayout()
|
||||
ltl.addWidget(front_button)
|
||||
ltl.addWidget(ql)
|
||||
l.addLayout(ltl, row, 0, 1, 1)
|
||||
else:
|
||||
l.addWidget(ql, row, 0, 1, 1)
|
||||
l.addWidget(widget, row, 1, 1, 2 if button is None else 1)
|
||||
if button is not None:
|
||||
l.addWidget(button, row, 2, 1, 1)
|
||||
@ -544,8 +583,10 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
|
||||
create_row2(1, self.rating)
|
||||
sto(self.rating, self.tags)
|
||||
create_row2(2, self.tags, self.tags_editor_button)
|
||||
sto(self.tags_editor_button, self.identifiers)
|
||||
create_row2(3, self.identifiers, self.clear_identifiers_button)
|
||||
sto(self.tags_editor_button, self.paste_isbn_button)
|
||||
sto(self.paste_isbn_button, self.identifiers)
|
||||
create_row2(3, self.identifiers, self.clear_identifiers_button,
|
||||
front_button=self.paste_isbn_button)
|
||||
sto(self.clear_identifiers_button, self.timestamp)
|
||||
create_row2(4, self.timestamp, self.timestamp.clear_button)
|
||||
sto(self.timestamp.clear_button, self.pubdate)
|
||||
@ -583,6 +624,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
|
||||
|
||||
cc_two_column = False
|
||||
one_line_comments_toolbar = True
|
||||
use_toolbutton_for_config_metadata = False
|
||||
|
||||
on_drag_enter = pyqtSignal()
|
||||
|
||||
@ -618,13 +660,11 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
|
||||
self.tabs[0].l.addWidget(gb, 0, 0, 1, 1)
|
||||
gb.setLayout(tl)
|
||||
|
||||
self.button_box.addButton(self.fetch_metadata_button,
|
||||
QDialogButtonBox.ActionRole)
|
||||
self.config_metadata_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
|
||||
self.config_metadata_button.setText(_('Configure metadata downloading'))
|
||||
self.button_box.addButton(self.config_metadata_button,
|
||||
QDialogButtonBox.ActionRole)
|
||||
sto(self.button_box, self.title)
|
||||
self.button_box_layout.insertWidget(1, self.fetch_metadata_button)
|
||||
self.button_box_layout.insertWidget(2, self.config_metadata_button)
|
||||
sto(self.button_box, self.fetch_metadata_button)
|
||||
sto(self.fetch_metadata_button, self.config_metadata_button)
|
||||
sto(self.config_metadata_button, self.title)
|
||||
|
||||
def create_row(row, widget, tab_to, button=None, icon=None, span=1):
|
||||
ql = BuddyLabel(widget)
|
||||
@ -642,6 +682,8 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
|
||||
sto(widget, tab_to)
|
||||
|
||||
tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
|
||||
tl.addWidget(self.manage_authors_button, 2, 0, 1, 1)
|
||||
tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
|
||||
|
||||
create_row(0, self.title, self.title_sort,
|
||||
button=self.deduce_title_sort_button, span=2,
|
||||
@ -663,6 +705,9 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
|
||||
button=self.timestamp.clear_button, icon='trash.png')
|
||||
create_row(11, self.identifiers, self.comments,
|
||||
button=self.clear_identifiers_button, icon='trash.png')
|
||||
sto(self.clear_identifiers_button, self.swap_title_author_button)
|
||||
sto(self.swap_title_author_button, self.manage_authors_button)
|
||||
sto(self.manage_authors_button, self.paste_isbn_button)
|
||||
tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
|
||||
12, 1, 1 ,1)
|
||||
|
||||
@ -702,7 +747,6 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
|
||||
gb = QGroupBox(_('Change cover'), tab1)
|
||||
l = QGridLayout()
|
||||
gb.setLayout(l)
|
||||
sto(self.swap_title_author_button, self.cover.buttons[0])
|
||||
for i, b in enumerate(self.cover.buttons[:3]):
|
||||
l.addWidget(b, 0, i, 1, 1)
|
||||
sto(b, self.cover.buttons[i+1])
|
||||
@ -732,6 +776,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
||||
|
||||
cc_two_column = False
|
||||
one_line_comments_toolbar = True
|
||||
use_toolbutton_for_config_metadata = False
|
||||
|
||||
def do_layout(self):
|
||||
self.central_widget.clear()
|
||||
@ -750,13 +795,11 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
||||
l.addWidget(gb, 0, 0, 1, 1)
|
||||
gb.setLayout(tl)
|
||||
|
||||
self.button_box.addButton(self.fetch_metadata_button,
|
||||
QDialogButtonBox.ActionRole)
|
||||
self.config_metadata_button.setToolButtonStyle(Qt.ToolButtonTextOnly)
|
||||
self.config_metadata_button.setText(_('Configure metadata downloading'))
|
||||
self.button_box.addButton(self.config_metadata_button,
|
||||
QDialogButtonBox.ActionRole)
|
||||
sto(self.button_box, self.title)
|
||||
self.button_box_layout.insertWidget(1, self.fetch_metadata_button)
|
||||
self.button_box_layout.insertWidget(2, self.config_metadata_button)
|
||||
sto(self.button_box, self.fetch_metadata_button)
|
||||
sto(self.fetch_metadata_button, self.config_metadata_button)
|
||||
sto(self.config_metadata_button, self.title)
|
||||
|
||||
def create_row(row, widget, tab_to, button=None, icon=None, span=1):
|
||||
ql = BuddyLabel(widget)
|
||||
@ -774,6 +817,8 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
||||
sto(widget, tab_to)
|
||||
|
||||
tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
|
||||
tl.addWidget(self.manage_authors_button, 2, 0, 2, 1)
|
||||
tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
|
||||
|
||||
create_row(0, self.title, self.title_sort,
|
||||
button=self.deduce_title_sort_button, span=2,
|
||||
@ -795,6 +840,9 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
||||
button=self.timestamp.clear_button, icon='trash.png')
|
||||
create_row(11, self.identifiers, self.comments,
|
||||
button=self.clear_identifiers_button, icon='trash.png')
|
||||
sto(self.clear_identifiers_button, self.swap_title_author_button)
|
||||
sto(self.swap_title_author_button, self.manage_authors_button)
|
||||
sto(self.manage_authors_button, self.paste_isbn_button)
|
||||
tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
|
||||
12, 1, 1 ,1)
|
||||
|
||||
@ -814,7 +862,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
||||
l.addWidget(gb, 0, 1, 1, 1)
|
||||
sp = QSizePolicy()
|
||||
sp.setVerticalStretch(10)
|
||||
sp.setHorizontalPolicy(QSizePolicy.Fixed)
|
||||
sp.setHorizontalPolicy(QSizePolicy.Minimum)
|
||||
sp.setVerticalPolicy(QSizePolicy.Expanding)
|
||||
gb.setSizePolicy(sp)
|
||||
self.set_custom_metadata_tab_order()
|
||||
@ -836,7 +884,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
|
||||
lb = QGridLayout()
|
||||
gb.setLayout(lb)
|
||||
lb.addWidget(self.cover, 0, 0, 1, 3, alignment=Qt.AlignCenter)
|
||||
sto(self.clear_identifiers_button, self.cover.buttons[0])
|
||||
sto(self.manage_authors_button, self.cover.buttons[0])
|
||||
for i, b in enumerate(self.cover.buttons[:3]):
|
||||
lb.addWidget(b, 1, i, 1, 1)
|
||||
sto(b, self.cover.buttons[i+1])
|
||||
|
@ -161,7 +161,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||
|
||||
def initialize(self):
|
||||
ConfigWidgetBase.initialize(self)
|
||||
self.current_font = self.initial_font = gprefs['font']
|
||||
font = gprefs['font']
|
||||
if font is not None:
|
||||
font = list(font)
|
||||
font.append(gprefs.get('font_stretch', QFont.Unstretched))
|
||||
self.current_font = self.initial_font = font
|
||||
self.update_font_display()
|
||||
self.display_model.initialize()
|
||||
|
||||
@ -178,7 +182,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||
def build_font_obj(self):
|
||||
font_info = self.current_font
|
||||
if font_info is not None:
|
||||
font = QFont(*font_info)
|
||||
font = QFont(*(font_info[:4]))
|
||||
font.setStretch(font_info[4])
|
||||
else:
|
||||
font = qt_app.original_font
|
||||
return font
|
||||
@ -215,15 +220,18 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
||||
if fd.exec_() == fd.Accepted:
|
||||
font = fd.selectedFont()
|
||||
fi = QFontInfo(font)
|
||||
self.current_font = (unicode(fi.family()), fi.pointSize(),
|
||||
fi.weight(), fi.italic())
|
||||
self.current_font = [unicode(fi.family()), fi.pointSize(),
|
||||
fi.weight(), fi.italic(), font.stretch()]
|
||||
self.update_font_display()
|
||||
self.changed_signal.emit()
|
||||
|
||||
def commit(self, *args):
|
||||
rr = ConfigWidgetBase.commit(self, *args)
|
||||
if self.current_font != self.initial_font:
|
||||
gprefs['font'] = self.current_font
|
||||
gprefs['font'] = (self.current_font[:4] if self.current_font else
|
||||
None)
|
||||
gprefs['font_stretch'] = (self.current_font[4] if self.current_font
|
||||
is not None else QFont.Unstretched)
|
||||
QApplication.setFont(self.font_display.font())
|
||||
rr = True
|
||||
self.display_model.commit()
|
||||
|
@ -71,9 +71,10 @@ class SourcesModel(QAbstractTableModel): # {{{
|
||||
plugin.is_configured()):
|
||||
return QIcon(I('list_remove.png'))
|
||||
elif role == Qt.ToolTipRole:
|
||||
base = plugin.description + '\n\n'
|
||||
if plugin.is_configured():
|
||||
return _('This source is configured and ready to go')
|
||||
return _('This source needs configuration')
|
||||
return base + _('This source is configured and ready to go')
|
||||
return base + _('This source needs configuration')
|
||||
return NONE
|
||||
|
||||
def setData(self, index, val, role):
|
||||
|
@ -75,6 +75,8 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
|
||||
|
||||
def find(self, query):
|
||||
query = query.strip()
|
||||
if not query:
|
||||
return QModelIndex()
|
||||
matches = self.parse(query)
|
||||
if not matches:
|
||||
return QModelIndex()
|
||||
@ -87,6 +89,8 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
|
||||
|
||||
def find_next(self, idx, query, backwards=False):
|
||||
query = query.strip()
|
||||
if not query:
|
||||
return idx
|
||||
matches = self.parse(query)
|
||||
if not matches:
|
||||
return idx
|
||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||
class StorePlugin(object): # {{{
|
||||
'''
|
||||
A plugin representing an online ebook repository (store). The store can
|
||||
be a comercial store that sells ebooks or a source of free downloadable
|
||||
be a commercial store that sells ebooks or a source of free downloadable
|
||||
ebooks.
|
||||
|
||||
Note that this class is the base class for these plugins, however, to
|
||||
@ -43,6 +43,8 @@ class StorePlugin(object): # {{{
|
||||
The easiest way to handle affiliate money payouts is to randomly select
|
||||
between the author's affiliate id and calibre's affiliate id so that
|
||||
70% of the time the author's id is used.
|
||||
|
||||
See declined.txt for a list of stores that do not want to be included.
|
||||
'''
|
||||
|
||||
def __init__(self, gui, name):
|
||||
|
92
src/calibre/gui2/store/beam_ebooks_de_plugin.py
Normal file
@ -0,0 +1,92 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import urllib2
|
||||
from contextlib import closing
|
||||
|
||||
from lxml import html
|
||||
|
||||
from PyQt4.Qt import QUrl
|
||||
|
||||
from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store import StorePlugin
|
||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||
|
||||
class BeamEBooksDEStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
url = 'http://klick.affiliwelt.net/klick.php?bannerid=10072&pid=32307&prid=908'
|
||||
url_details = ('http://klick.affiliwelt.net/klick.php?'
|
||||
'bannerid=10730&pid=32307&prid=908&prodid={0}')
|
||||
|
||||
if external or self.config.get('open_external', False):
|
||||
if detail_item:
|
||||
url = url_details.format(detail_item)
|
||||
open_url(QUrl(url))
|
||||
else:
|
||||
detail_url = None
|
||||
if detail_item:
|
||||
detail_url = url_details.format(detail_item)
|
||||
d = WebStoreDialog(self.gui, url, parent, detail_url)
|
||||
d.setWindowTitle(self.name)
|
||||
d.set_tags(self.config.get('tags', ''))
|
||||
d.exec_()
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
url = 'http://www.beam-ebooks.de/suchergebnis.php?Type=&sw=' + urllib2.quote(query)
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read())
|
||||
for data in doc.xpath('//table[tr/td/div[@class="stil2"]]'):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
id = ''.join(data.xpath('./tr/td/div[@class="stil2"]/a/@href')).strip()
|
||||
if not id:
|
||||
continue
|
||||
id = id[7:]
|
||||
cover_url = ''.join(data.xpath('./tr/td[1]/a/img/@src'))
|
||||
if cover_url:
|
||||
cover_url = 'http://www.beam-ebooks.de' + cover_url
|
||||
title = ''.join(data.xpath('./tr/td/div[@class="stil2"]/a/b/text()'))
|
||||
author = ' '.join(data.xpath('./tr/td/div[@class="stil2"]/'
|
||||
'child::b/text()'
|
||||
'|'
|
||||
'./tr/td/div[@class="stil2"]/'
|
||||
'child::strong/text()'))
|
||||
price = ''.join(data.xpath('./tr/td[3]/text()'))
|
||||
pdf = data.xpath(
|
||||
'boolean(./tr/td[3]/a/img[contains(@alt, "PDF")]/@alt)')
|
||||
epub = data.xpath(
|
||||
'boolean(./tr/td[3]/a/img[contains(@alt, "ePub")]/@alt)')
|
||||
mobi = data.xpath(
|
||||
'boolean(./tr/td[3]/a/img[contains(@alt, "Mobipocket")]/@alt)')
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price
|
||||
s.drm = SearchResult.DRM_UNLOCKED
|
||||
s.detail_item = id
|
||||
formats = []
|
||||
if epub:
|
||||
formats.append('ePub')
|
||||
if pdf:
|
||||
formats.append('PDF')
|
||||
if mobi:
|
||||
formats.append('MOBI')
|
||||
s.formats = ', '.join(formats)
|
||||
|
||||
yield s
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import random
|
||||
import re
|
||||
import urllib2
|
||||
import urllib
|
||||
from contextlib import closing
|
||||
|
||||
from lxml import html
|
||||
@ -48,7 +48,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results
|
||||
url += urllib2.quote(query)
|
||||
url += urllib.quote_plus(query)
|
||||
|
||||
br = browser()
|
||||
|
||||
|
5
src/calibre/gui2/store/declined.txt
Normal file
@ -0,0 +1,5 @@
|
||||
This is a list of stores that objected, declined
|
||||
or asked not to be included in the store integration.
|
||||
|
||||
* Borders (http://www.borders.com/)
|
||||
* WH Smith (http://www.whsmith.co.uk/)
|
80
src/calibre/gui2/store/epubbuy_de_plugin.py
Normal file
@ -0,0 +1,80 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import urllib2
|
||||
from contextlib import closing
|
||||
|
||||
from lxml import html
|
||||
|
||||
from PyQt4.Qt import QUrl
|
||||
|
||||
from calibre import browser
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store import StorePlugin
|
||||
from calibre.gui2.store.basic_config import BasicStoreConfig
|
||||
from calibre.gui2.store.search_result import SearchResult
|
||||
from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||
|
||||
class EPubBuyDEStore(BasicStoreConfig, StorePlugin):
|
||||
|
||||
def open(self, parent=None, detail_item=None, external=False):
|
||||
url = 'http://klick.affiliwelt.net/klick.php?bannerid=47653&pid=32307&prid=2627'
|
||||
url_details = ('http://klick.affiliwelt.net/klick.php?bannerid=47653'
|
||||
'&pid=32307&prid=2627&prodid={0}')
|
||||
|
||||
if external or self.config.get('open_external', False):
|
||||
if detail_item:
|
||||
url = url_details.format(detail_item)
|
||||
open_url(QUrl(url))
|
||||
else:
|
||||
detail_url = None
|
||||
if detail_item:
|
||||
detail_url = url_details.format(detail_item)
|
||||
d = WebStoreDialog(self.gui, url, parent, detail_url)
|
||||
d.setWindowTitle(self.name)
|
||||
d.set_tags(self.config.get('tags', ''))
|
||||
d.exec_()
|
||||
|
||||
def search(self, query, max_results=10, timeout=60):
|
||||
url = 'http://www.epubbuy.com/search.php?search_query=' + urllib2.quote(query)
|
||||
br = browser()
|
||||
|
||||
counter = max_results
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
doc = html.fromstring(f.read())
|
||||
for data in doc.xpath('//li[contains(@class, "ajax_block_product")]'):
|
||||
if counter <= 0:
|
||||
break
|
||||
|
||||
id = ''.join(data.xpath('./div[@class="center_block"]'
|
||||
'/p[contains(text(), "artnr:")]/text()')).strip()
|
||||
if not id:
|
||||
continue
|
||||
id = id[6:].strip()
|
||||
if not id:
|
||||
continue
|
||||
cover_url = ''.join(data.xpath('./div[@class="center_block"]'
|
||||
'/a[@class="product_img_link"]/img/@src'))
|
||||
if cover_url:
|
||||
cover_url = 'http://www.epubbuy.com' + cover_url
|
||||
title = ''.join(data.xpath('./div[@class="center_block"]'
|
||||
'/a[@class="product_img_link"]/@title'))
|
||||
author = ''.join(data.xpath('./div[@class="center_block"]/a[2]/text()'))
|
||||
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
|
||||
counter -= 1
|
||||
|
||||
s = SearchResult()
|
||||
s.cover_url = cover_url
|
||||
s.title = title.strip()
|
||||
s.author = author.strip()
|
||||
s.price = price
|
||||
s.drm = SearchResult.DRM_UNLOCKED
|
||||
s.detail_item = id
|
||||
s.formats = 'ePub'
|
||||
|
||||
yield s
|
@ -73,6 +73,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
|
||||
s.price = price
|
||||
s.detail_item = id
|
||||
s.drm = SearchResult.DRM_LOCKED
|
||||
s.formats = 'EPUB'
|
||||
s.formats = 'ePub'
|
||||
|
||||
yield s
|
||||
|
@ -12,6 +12,7 @@ from threading import Thread
|
||||
from Queue import Queue
|
||||
|
||||
from calibre import browser
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.magick.draw import thumbnail
|
||||
|
||||
class GenericDownloadThreadPool(object):
|
||||
@ -119,7 +120,8 @@ class SearchThread(Thread):
|
||||
self.results.put((res, store_plugin))
|
||||
self.tasks.task_done()
|
||||
except:
|
||||
traceback.print_exc()
|
||||
if DEBUG:
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
class CoverThreadPool(GenericDownloadThreadPool):
|
||||
@ -157,7 +159,8 @@ class CoverThread(Thread):
|
||||
callback()
|
||||
self.tasks.task_done()
|
||||
except:
|
||||
continue
|
||||
if DEBUG:
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
class DetailsThreadPool(GenericDownloadThreadPool):
|
||||
@ -191,7 +194,8 @@ class DetailsThread(Thread):
|
||||
callback(result)
|
||||
self.tasks.task_done()
|
||||
except:
|
||||
continue
|
||||
if DEBUG:
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
class CacheUpdateThreadPool(GenericDownloadThreadPool):
|
||||
@ -221,4 +225,5 @@ class CacheUpdateThread(Thread):
|
||||
store_plugin, timeout = self.tasks.get()
|
||||
store_plugin.update_cache(timeout=timeout, suppress_progress=True)
|
||||
except:
|
||||
traceback.print_exc()
|
||||
if DEBUG:
|
||||
traceback.print_exc()
|
||||
|
@ -23,8 +23,8 @@ TIMEOUT = 75 # seconds
|
||||
|
||||
class SearchDialog(QDialog, Ui_Dialog):
|
||||
|
||||
def __init__(self, istores, *args):
|
||||
QDialog.__init__(self, *args)
|
||||
def __init__(self, istores, parent=None, query=''):
|
||||
QDialog.__init__(self, parent)
|
||||
self.setupUi(self)
|
||||
|
||||
self.config = JSONConfig('store/search')
|
||||
@ -47,13 +47,16 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
# per search basis.
|
||||
stores_group_layout = QVBoxLayout()
|
||||
self.stores_group.setLayout(stores_group_layout)
|
||||
for x in self.store_plugins:
|
||||
for x in sorted(self.store_plugins.keys(), key=lambda x: x.lower()):
|
||||
cbox = QCheckBox(x)
|
||||
cbox.setChecked(True)
|
||||
stores_group_layout.addWidget(cbox)
|
||||
setattr(self, 'store_check_' + x, cbox)
|
||||
stores_group_layout.addStretch()
|
||||
|
||||
# Set the search query
|
||||
self.search_edit.setText(query)
|
||||
|
||||
# Create and add the progress indicator
|
||||
self.pi = ProgressIndicator(self, 24)
|
||||
self.top_layout.addWidget(self.pi)
|
||||
@ -93,7 +96,7 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
# Store / Formats
|
||||
self.results_view.setColumnWidth(4, int(total*.25))
|
||||
|
||||
def do_search(self, checked=False):
|
||||
def do_search(self):
|
||||
# Stop all running threads.
|
||||
self.checker.stop()
|
||||
self.search_pool.abort()
|
||||
@ -136,14 +139,17 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
query = query.replace('>', '')
|
||||
query = query.replace('<', '')
|
||||
# Remove the prefix.
|
||||
for loc in ( 'all', 'author', 'authors', 'title'):
|
||||
query = re.sub(r'%s:"?(?P<a>[^\s"]+)"?' % loc, '\g<a>', query)
|
||||
for loc in ('all', 'author', 'authors', 'title'):
|
||||
query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
|
||||
query = query.replace('%s:' % loc, '')
|
||||
# Remove the prefix and search text.
|
||||
for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'):
|
||||
query = re.sub(r'%s:"[^"]"' % loc, '', query)
|
||||
query = re.sub(r'%s:[^\s]*' % loc, '', query)
|
||||
# Remove logic.
|
||||
query = re.sub(r'(^|\s)(and|not|or)(\s|$)', ' ', query)
|
||||
query = re.sub(r'(^|\s)(and|not|or|a|the|is|of)(\s|$)', ' ', query)
|
||||
# Remove "
|
||||
query = query.replace('"', '')
|
||||
# Remove excess whitespace.
|
||||
query = re.sub(r'\s{2,}', ' ', query)
|
||||
query = query.strip()
|
||||
@ -252,4 +258,9 @@ class SearchDialog(QDialog, Ui_Dialog):
|
||||
self.search_pool.abort()
|
||||
self.cache_pool.abort()
|
||||
self.save_state()
|
||||
|
||||
def exec_(self):
|
||||
if unicode(self.search_edit.text()).strip():
|
||||
self.do_search()
|
||||
return QDialog.exec_(self)
|
||||
|
||||
|
@ -76,7 +76,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
|
||||
s.detail_item = id
|
||||
formats = []
|
||||
if epub:
|
||||
formats.append('EPUB')
|
||||
formats.append('ePub')
|
||||
if pdf:
|
||||
formats.append('PDF')
|
||||
s.formats = ', '.join(formats)
|
||||
|