Sync to trunk.
111
Changelog.yaml
@ -19,6 +19,117 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.7.49
|
||||
date: 2011-03-11
|
||||
|
||||
new features:
|
||||
- title: "News download: More flexible news downlaod scheduling. You can now schedule by days of the week, days of the month and an interval, which can be as small as an hour for news sources that change rapidly"
|
||||
|
||||
- title: "Improved support for dragging and dropping cover images directly from web browsers into calibre."
|
||||
description: >
|
||||
"You can drop the images onto the cover in calibre and it will be replaced. Tested on a number of OS/browser combinations, but I am sure there a still a few for which it wont work."
|
||||
|
||||
- title: "Add shortcuts of Alt+Left and Alt+Right for the next and previous buttons in the edit metadata dialog."
|
||||
tickets: [9360]
|
||||
|
||||
- title: "When adding a GUI plugin, prompt the user for where the plugin should be displayed"
|
||||
|
||||
- title: "Conversion: When using the Level x Table of Contents options, support the case when the level 1,2,3 items are spread over multiple HTML files."
|
||||
|
||||
- title: "Support for the Optimus V"
|
||||
|
||||
- title: "FB2 Input: Support for tables"
|
||||
tickets: [9302]
|
||||
|
||||
- title: "Display a checkmark/cross next to 'true' and 'false' items in custom columns. Controlled via Preferences->Add a custom column"
|
||||
|
||||
- title: "Catalog generation: Reuse cover from existing catalog, allows the use of a custom cover for catalogs"
|
||||
|
||||
- title: "When setting covers in calibre, resize to fit within a maximum size of (1200, 1600), to prevent slowdowns due to extra large covers. This size can be controlled via Preferences->Tweaks."
|
||||
tickets: [9277]
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix long standing bug that caused errors when saving books to disk if the book metadata has certain chinese/russian characters on windows. The fix required some changes to how unicode paths are handled in calibre, so it might have broken something else. If so, please open a ticket."
|
||||
tickets: [7250]
|
||||
|
||||
- title: "Custom recipes: Store custom recipes in the calibre config directory instead of the library database. This allows scheduling of custom recipes to work with multiple libraries. Note that you may have to re-schedule any existing custom recipes."
|
||||
|
||||
- title: "Restore the ability to do search and replace on ISBN. Use the 'identifiers' field with type isbn to do this"
|
||||
|
||||
- title: "Fix amazon metadata download plugin not working with ISBN-13 and social metadata not downloading if the supplied ISBN 10 is not for an edition available on Amazon"
|
||||
|
||||
- title: "Workaround for openlibrary blocking the user agent used by calibre, preventing cover downloads from that site"
|
||||
|
||||
- title: "FB2 Output: Add sequence to metadata. Fix bugs with author names. Fix bug where <empty-line/> elements were put inside <p> tags."
|
||||
|
||||
- title: "Conversion pipeline: If the input HTML document uses uppercase tag and attribute names, convert them to lowercase"
|
||||
|
||||
- title: "RTF Input: Fix space after unicode quote character being incorrectly removed"
|
||||
tickets: [9343]
|
||||
|
||||
- title: "Fix regression that broke the ebook-device command line program in the previous release"
|
||||
|
||||
- title: "Fix custom columns with numbers not allowing entry of positive numbers of 64-bit machines"
|
||||
tickets: [9283]
|
||||
|
||||
- title: "Fix regression that caused focus to be lost when editing metadata in the device view"
|
||||
tickets: [9323]
|
||||
|
||||
- title: "CHM Input: If an input encoding is specified, use it rather than trying to detect the encoding of the text in the CHM file."
|
||||
tickets: [9173]
|
||||
|
||||
- title: "Fix regression that caused the viewer to forget its window size and other attributes when launched from within calibre, after calibre is restarted."
|
||||
tickets: [9326]
|
||||
|
||||
- title: "News download: Fix regression that caused the delay parameter in recipes to not actually delay downloads."
|
||||
tickets: [9332]
|
||||
|
||||
- title: "Conversion pipeline: When converting the :first-letter pseudo CSS selector to a <span> follow W3C rules for handling leading punctuation characters."
|
||||
tickets: [9319]
|
||||
|
||||
- title: "Fix regression that caused clicking saved searches in the Tag Browser to not work"
|
||||
|
||||
- title: "Comic Input: Fix conversion failing when output profile is set to Tablet Output"
|
||||
|
||||
- title: "Replace leading periods in all path components generated by calibre with underscores"
|
||||
|
||||
- title: "Search and replace preferences: Prevent very long strings from causing the wizard button to get pushed off the screen"
|
||||
|
||||
- title: "Content server: Fix regression that caused various metadata to be missing in the book details view."
|
||||
ticckets: [8929]
|
||||
|
||||
- title: "Apple driver: Ignore invalid EPUBs when sending to iTunes"
|
||||
|
||||
improved recipes:
|
||||
- golem.de
|
||||
- gulli.de
|
||||
- La Nacion
|
||||
- Ming Pao
|
||||
- evz.ro
|
||||
- Kompiuterra
|
||||
- NRC Handelsblad (EPUB)
|
||||
- The Leduc - Wetaskiwin Pipestone Flyer
|
||||
|
||||
new recipes:
|
||||
- title: "Various Romanian news sources"
|
||||
author: Silviu Cotoara
|
||||
|
||||
- title: "Salt Lake City Tribune"
|
||||
author: Charles Holbert
|
||||
|
||||
- title: "Bay Citizen and Oakland North"
|
||||
author: noah
|
||||
|
||||
- title: "Nikkei Business and JB Press"
|
||||
author: Ado Nishimura
|
||||
|
||||
- title: "El Pais Babelia"
|
||||
author: oneillpt
|
||||
|
||||
- title: "Komchadluek"
|
||||
author: ballsai
|
||||
|
||||
|
||||
- version: 0.7.48
|
||||
date: 2011-03-04
|
||||
|
||||
|
BIN
resources/images/news/avantaje.png
Normal file
After Width: | Height: | Size: 924 B |
BIN
resources/images/news/cotidianul.png
Normal file
After Width: | Height: | Size: 495 B |
BIN
resources/images/news/ele.png
Normal file
After Width: | Height: | Size: 414 B |
BIN
resources/images/news/felicia.png
Normal file
After Width: | Height: | Size: 840 B |
BIN
resources/images/news/financiarul.png
Normal file
After Width: | Height: | Size: 302 B |
BIN
resources/images/news/hitro.png
Normal file
After Width: | Height: | Size: 521 B |
BIN
resources/images/news/imperatortravel.png
Normal file
After Width: | Height: | Size: 556 B |
BIN
resources/images/news/kamikaze.png
Normal file
After Width: | Height: | Size: 262 B |
BIN
resources/images/news/kompiutierra.png
Normal file
After Width: | Height: | Size: 654 B |
BIN
resources/images/news/monden.png
Normal file
After Width: | Height: | Size: 437 B |
BIN
resources/images/news/onemagazine.png
Normal file
After Width: | Height: | Size: 316 B |
BIN
resources/images/news/pcworldro.png
Normal file
After Width: | Height: | Size: 386 B |
BIN
resources/images/news/promotor.png
Normal file
After Width: | Height: | Size: 728 B |
BIN
resources/images/news/protvmagazin.png
Normal file
After Width: | Height: | Size: 251 B |
BIN
resources/images/news/psychologies.png
Normal file
After Width: | Height: | Size: 750 B |
BIN
resources/images/news/publika.png
Normal file
After Width: | Height: | Size: 290 B |
BIN
resources/images/news/rbc_ru.png
Normal file
After Width: | Height: | Size: 371 B |
BIN
resources/images/news/timesnewroman.png
Normal file
After Width: | Height: | Size: 494 B |
BIN
resources/images/news/trombon.png
Normal file
After Width: | Height: | Size: 375 B |
BIN
resources/images/news/tvmania.png
Normal file
After Width: | Height: | Size: 379 B |
BIN
resources/images/news/viva.png
Normal file
After Width: | Height: | Size: 747 B |
BIN
resources/images/news/wallstreetro.png
Normal file
After Width: | Height: | Size: 768 B |
57
resources/recipes/avantaje.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
avantaje.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Avantaje(BasicNewsRecipe):
|
||||
title = u'Avantaje'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u''
|
||||
publisher = u'Avantaje'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.avantaje.ro/images/default/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'articol'})
|
||||
, dict(name='div', attrs={'class':'gallery clearfix'})
|
||||
, dict(name='div', attrs={'align':'justify'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['color_sanatate_box']})
|
||||
, dict(name='div', attrs={'class':['nav']})
|
||||
, dict(name='div', attrs={'class':['voteaza_art']})
|
||||
, dict(name='div', attrs={'class':['bookmark']})
|
||||
, dict(name='div', attrs={'class':['links clearfix']})
|
||||
, dict(name='div', attrs={'class':['title']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['title']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds.feedburner.com/Avantaje')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
46
resources/recipes/bay_citizen.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheBayCitizen(BasicNewsRecipe):
|
||||
title = 'The Bay Citizen'
|
||||
language = 'en'
|
||||
__author__ = 'noah'
|
||||
description = 'The Bay Citizen'
|
||||
publisher = 'The Bay Citizen'
|
||||
INDEX = u'http://www.baycitizen.org'
|
||||
category = 'news'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
masthead_url = 'http://media.baycitizen.org/images/layout/logo1.png'
|
||||
feeds = [('Main Feed', 'http://www.baycitizen.org/feeds/stories/')]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'story'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'socialBar'}),
|
||||
dict(name='div', attrs={'id':'text-resize'}),
|
||||
dict(name='div', attrs={'class':'story relatedContent'}),
|
||||
dict(name='div', attrs={'id':'comment_status_loading'}),
|
||||
]
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('a',attrs={'class':'stry-next'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'body'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
self.append_page(soup, soup.body, 3)
|
||||
garbage = soup.findAll(id='story-pagination')
|
||||
[trash.extract() for trash in garbage]
|
||||
garbage = soup.findAll('em', 'cont-from-prev')
|
||||
[trash.extract() for trash in garbage]
|
||||
return soup
|
69
resources/recipes/cotidianul.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
cotidianul.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Cotidianul(BasicNewsRecipe):
|
||||
title = u'Cotidianul'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u''
|
||||
publisher = u'Cotidianul'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.cotidianul.ro/images/cotidianul.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'titlu'})
|
||||
, dict(name='div', attrs={'class':'gallery clearfix'})
|
||||
, dict(name='div', attrs={'align':'justify'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['space']})
|
||||
, dict(name='div', attrs={'id':['title_desc']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['space']})
|
||||
, dict(name='span', attrs={'class':['date']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.cotidianul.ro/rssfeed/ToateStirile.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -24,7 +24,7 @@ class Economist(BasicNewsRecipe):
|
||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info']}),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info', 'share_inline_header']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
|
@ -18,7 +18,8 @@ class Economist(BasicNewsRecipe):
|
||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info']}),
|
||||
dict(attrs={'class':['dblClkTrk', 'ec-article-info',
|
||||
'share_inline_header']}),
|
||||
{'class': lambda x: x and 'share-links-header' in x},
|
||||
]
|
||||
keep_only_tags = [dict(id='ec-article-body')]
|
||||
|
49
resources/recipes/el_pais_babelia.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElPaisBabelia(BasicNewsRecipe):
|
||||
|
||||
title = 'El Pais Babelia'
|
||||
__author__ = 'oneillpt'
|
||||
description = 'El Pais Babelia'
|
||||
INDEX = 'http://www.elpais.com/suple/babelia/'
|
||||
language = 'es'
|
||||
|
||||
remove_tags_before = dict(name='div', attrs={'class':'estructura_2col'})
|
||||
keep_tags = [dict(name='div', attrs={'class':'estructura_2col'})]
|
||||
remove_tags = [dict(name='div', attrs={'class':'votos estirar'}),
|
||||
dict(name='div', attrs={'id':'utilidades'}),
|
||||
dict(name='div', attrs={'class':'info_relacionada'}),
|
||||
dict(name='div', attrs={'class':'mod_apoyo'}),
|
||||
dict(name='div', attrs={'class':'contorno_f'}),
|
||||
dict(name='div', attrs={'class':'pestanias'}),
|
||||
dict(name='div', attrs={'class':'otros_webs'}),
|
||||
dict(name='div', attrs={'id':'pie'})
|
||||
]
|
||||
#no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
feeds = []
|
||||
for section in soup.findAll('div', attrs={'class':'contenedor_nuevo'}):
|
||||
section_title = self.tag_to_string(section.find('h1'))
|
||||
articles = []
|
||||
for post in section.findAll('a', href=True):
|
||||
url = post['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.elpais.es'+url
|
||||
title = self.tag_to_string(post)
|
||||
if str(post).find('class=') > 0:
|
||||
klass = post['class']
|
||||
if klass != "":
|
||||
self.log()
|
||||
self.log('--> post: ', post)
|
||||
self.log('--> url: ', url)
|
||||
self.log('--> title: ', title)
|
||||
self.log('--> class: ', klass)
|
||||
articles.append({'title':title, 'url':url})
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
return feeds
|
||||
|
58
resources/recipes/ele.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
ele.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ele(BasicNewsRecipe):
|
||||
title = u'Ele'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Dezv\u0103luie ceea ce e\u015fti'
|
||||
publisher = u'Ele'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.tripmedia.ro/tripadmin/photos/logo_ele_mare.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'article_title'})
|
||||
, dict(name='div', attrs={'class':'article_text'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.ele.ro/rss_must_read')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,52 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
evz.ro
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EVZ_Ro(BasicNewsRecipe):
|
||||
title = 'evz.ro'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Romania'
|
||||
publisher = 'evz.ro'
|
||||
category = 'news, politics, Romania'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
class EvenimentulZilei(BasicNewsRecipe):
|
||||
title = u'Evenimentul Zilei'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = ''
|
||||
publisher = u'Evenimentul Zilei'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif'
|
||||
extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} '
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.evz.ro/fileadmin/images/evzLogo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<head>.*?<title>', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>')
|
||||
,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
||||
]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'single'})
|
||||
, dict(name='img', attrs={'id':'placeholder'})
|
||||
, dict(name='a', attrs={'id':'holderlink'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['form','embed','iframe','object','base','link','script','noscript'])
|
||||
,dict(attrs={'class':['section','statsInfo','email il']})
|
||||
,dict(attrs={'id' :'gallery'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['articleInfo']})
|
||||
, dict(name='div', attrs={'id':['bannerAddoceansArticleJos']})
|
||||
, dict(name='div', attrs={'id':['bannerAddoceansArticle']})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(attrs={'class':'section'})
|
||||
keep_only_tags = [dict(attrs={'class':'single'})]
|
||||
remove_attributes = ['height','width']
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':['bannerAddoceansArticleJos']})
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')]
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.evz.ro/rss.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
return self.adeify_images(soup)
|
||||
|
48
resources/recipes/felicia.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
revistafelicia.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Felicia(BasicNewsRecipe):
|
||||
title = u'Revista Felicia'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'O revist\u0103 pentru sufletul t\u0103u'
|
||||
publisher = u'Revista Felicia'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.3waves.net/uploads/image/logo-revista-felicia_03.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'header'})
|
||||
, dict(name='div', attrs={'id':'contentArticol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='img',attrs={'src':['http://www.revistafelicia.ro/templates/default/images/hdr_ultimul_nr.jpg']})
|
||||
, dict(name='div',attrs={'class':['content']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.revistafelicia.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
55
resources/recipes/financiarul.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
financiarul.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Financiarul(BasicNewsRecipe):
|
||||
title = u'Financiarul'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'FIN.ro'
|
||||
publisher = u'Financiarul'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.financiarul.com/templates/default/images/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'col2ContentLeftL'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':['infoArticol']})
|
||||
, dict(name='ul', attrs={'class':'navSectiuni'})
|
||||
, dict(name='div', attrs={'class':'separator separatorTop'})
|
||||
, dict(name='div', attrs={'class':'infoArticol infoArticolBottom'})
|
||||
, dict(name='ul', attrs={'class':['related']})
|
||||
, dict(name='div', attrs={'class':['slot panel300 panelGri300 panelGri300s panelGri300sm']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='ul', attrs={'class':['related']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.financiarul.com/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,17 +1,83 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
#!/usr/bin/env python
|
||||
|
||||
class AdvancedUserRecipe1257093338(BasicNewsRecipe):
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class golem_ger(BasicNewsRecipe):
|
||||
title = u'Golem.de'
|
||||
language = 'de'
|
||||
__author__ = 'Kovid Goyal'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
no_stylesheets = True
|
||||
encoding = 'iso-8859-1'
|
||||
recursions = 1
|
||||
match_regexps = [r'http://www.golem.de/.*.html']
|
||||
|
||||
feeds = [(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0')]
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'artikelhead'}),
|
||||
dict(name='p', attrs={'class':'teaser'}),
|
||||
dict(name='div', attrs={'class':'artikeltext'}),
|
||||
dict(name='h2', attrs={'id':'artikelhead'}),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
murxb = url.rfind('/') + 1
|
||||
murxc = url[murxb :-5]
|
||||
murxa = 'http://www.golem.de/' + 'print.php?a=' + murxc
|
||||
return murxa
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}),
|
||||
dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}),
|
||||
dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}),
|
||||
dict(name='td', attrs={'class':['xsmall']}),
|
||||
]
|
||||
|
||||
|
||||
# remove_tags_after = [
|
||||
# dict(name='div', attrs={'id':['contentad2']})
|
||||
# ]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
|
||||
(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
|
||||
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
|
||||
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
|
||||
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
|
||||
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'),
|
||||
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
|
||||
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'),
|
||||
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
|
||||
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
|
||||
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
|
||||
(u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
|
||||
(u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
|
||||
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
|
||||
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
|
||||
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
|
||||
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'),
|
||||
(u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0')
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
|
||||
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
|
||||
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
|
||||
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
|
||||
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
|
||||
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
|
||||
]
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;}
|
||||
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
|
||||
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
|
||||
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
|
||||
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
|
||||
.teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
|
||||
.xsmall{font-style:italic;font-size:x-small;}
|
||||
.td{font-style:italic;font-size:x-small;}
|
||||
img {align:left;}
|
||||
'''
|
||||
|
@ -11,6 +11,26 @@ class AdvancedUserRecipe1259599587(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'gulli:news', u'http://ticker.gulli.com/rss/')]
|
||||
|
||||
remove_tags = [{'class' : ['addthis_button', 'BreadCrumb']}, {'id' : ['plista0']}]
|
||||
remove_tags = [dict(name='div', attrs={'class':['FloatL','_forumBox']})]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'inside'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['_contentLeft']})]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['_bookmark']})]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:25px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:22px; }
|
||||
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
|
||||
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
|
||||
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
|
||||
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
|
||||
.newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
|
||||
.articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
|
||||
.byline {color:#666;margin-bottom:0;font-size:12px}
|
||||
.blockquote {color:#030303;font-style:italic;padding-left:15px;}
|
||||
img {align:center;}
|
||||
.li {list-style-type: none}
|
||||
'''
|
||||
|
43
resources/recipes/hitro.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
hit.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Hit(BasicNewsRecipe):
|
||||
title = u'HIT'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'IT'
|
||||
publisher = 'HIT'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,IT'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.hit.ro/lib/images/frontend/hit_logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'art_titl'})
|
||||
, dict(name='div', attrs={'id':'continut_articol'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.hit.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
68
resources/recipes/imperatortravel.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
imperatortravel.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Imperatortravel(BasicNewsRecipe):
|
||||
title = u'Imperator Travel'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'C\u0103l\u0103torii'
|
||||
publisher = u'Imperator Travel'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Turism,Calatorii'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.imperatortravel.ro/images/header-1.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article first_main_article'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['meta']})
|
||||
, dict(name='body', attrs={'class':['transparent_widget ff3 win Locale_en_US']})
|
||||
, dict(name='div', attrs={'class':['connect_widget']})
|
||||
, dict(name='ul', attrs={'class':['similar-posts']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='ul', attrs={'class':['similar-posts']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds.feedburner.com/ImperatorTravels')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
42
resources/recipes/jbpress.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
import urllib2
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class JBPress(BasicNewsRecipe):
|
||||
title = u'JBPress'
|
||||
language = 'ja'
|
||||
description = u'Japan Business Press New articles (using small print version)'
|
||||
__author__ = 'Ado Nishimura'
|
||||
needs_subscription = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_before = dict(id='wrapper')
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [('JBPress new article', 'http://feed.ismedia.jp/rss/jbpress/all.rdf')]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://www.jbpress.co.jp/common/images/v1/jpn/common/logo.gif'
|
||||
|
||||
def get_browser(self):
|
||||
html = '''<form action="https://jbpress.ismedia.jp/auth/dologin/http://jbpress.ismedia.jp/articles/print/5549" method="post">
|
||||
<input id="login" name="login" type="text"/>
|
||||
<input id="password" name="password" type="password"/>
|
||||
<input id="rememberme" name="rememberme" type="checkbox"/>
|
||||
</form>
|
||||
'''
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://jbpress.ismedia.jp/articles/print/5549')
|
||||
response = br.response()
|
||||
response.set_data(html)
|
||||
br.set_response(response)
|
||||
br.select_form(nr=0)
|
||||
br["login"] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def print_version(self, url):
|
||||
url = urllib2.urlopen(url).geturl() # resolve redirect.
|
||||
return url.replace('/-/', '/print/')
|
53
resources/recipes/kamikaze.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
kamikazeonline.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Kamikaze(BasicNewsRecipe):
|
||||
title = u'Kamikaze'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'S\u0103pt\u0103m\u00e2nal sc\u0103pat de sub control'
|
||||
publisher = 'Kamikaze'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.kamikazeonline.ro/wp-content/themes/kamikaze/images/kamikazeonline_header.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['connect_confirmation_cell connect_confirmation_cell_no_like']})
|
||||
, dict(name='h3', attrs={'id':['comments']})
|
||||
, dict(name='ul', attrs={'class':['addtoany_list']})
|
||||
, dict(name='p', attrs={'class':['postmetadata']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='p', attrs={'class':['postmetadata']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.kamikazeonline.ro/feed/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
46
resources/recipes/komchadluek.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class KomChadLuek(BasicNewsRecipe):
|
||||
|
||||
title= 'KomChadLuek'
|
||||
description = 'Komchadluek News'
|
||||
__author__ = 'ballsaii and Chotechai'
|
||||
__license__ = 'GPL v3'
|
||||
publisher= 'Nation Media Group'
|
||||
category = 'news, Thai'
|
||||
language = 'th'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
remove_javascript=True
|
||||
|
||||
cover_url = 'http://www.komchadluek.net/images_layout2/komchadluek_headerlogo.png'
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'h2'))
|
||||
keep_only_tags.append(dict(name = 'div', attrs={'id':'news_detail_news'}))
|
||||
|
||||
remove_tags_after=[dict(name='hr')]
|
||||
|
||||
feeds =(
|
||||
(u'\u0e01\u0e32\u0e23\u0e40\u0e21\u0e37\u0e2d\u0e07','http://www.komchadluek.net/rss/politic.xml'),
|
||||
(u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28','http://www.komchadluek.net/rss/sport.xml'),
|
||||
(u'\u0e40\u0e01\u0e29\u0e15\u0e23','http://www.komchadluek.net/rss/agriculture.xml'),
|
||||
(u'\u0e15\u0e48\u0e32\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28','http://www.komchadluek.net/rss/foreign.xml'),
|
||||
(u'\u0e1a\u0e31\u0e19\u0e40\u0e17\u0e34\u0e07','http://www.komchadluek.net/rss/entertainment.xml'),
|
||||
(u'\u0e1c\u0e39\u0e49\u0e2b\u0e0d\u0e34\u0e07-\u0e41\u0e1f\u0e0a\u0e31\u0e48\u0e19','http://www.komchadluek.net/rss/fashion.xml'),
|
||||
(u'\u0e1e\u0e23\u0e30\u0e40\u0e04\u0e23\u0e37\u0e48\u0e2d\u0e07','http://www.komchadluek.net/rss/amulet.xml'),
|
||||
(u'\u0e20\u0e39\u0e21\u0e34\u0e20\u0e32\u0e04-\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e04\u0e21\u0e17\u0e49\u0e2d\u0e07\u0e16\u0e34\u0e48\u0e19','http://www.komchadluek.net/rss/local.xml'),
|
||||
(u'\u0e25\u0e38\u0e07\u0e41\u0e08\u0e48\u0e21','http://www.komchadluek.net/rss/unclecham.xml'),
|
||||
(u'\u0e44\u0e25\u0e1f\u0e4c\u0e2a\u0e44\u0e15\u0e25\u0e4c','http://www.komchadluek.net/rss/lifestyle.xml'),
|
||||
(u'\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01\u0e34\u0e08-\u0e01\u0e32\u0e23\u0e15\u0e25\u0e32\u0e14','http://www.komchadluek.net/rss/economic.xml'),
|
||||
(u'\u0e2d\u0e32\u0e2b\u0e32\u0e23','http://www.komchadluek.net/rss/food.xml'),
|
||||
(u'\u0e04\u0e19\u0e23\u0e31\u0e01\u0e1a\u0e49\u0e32\u0e19-\u0e22\u0e32\u0e19\u0e22\u0e19\u0e15\u0e4c','http://www.komchadluek.net/rss/homecar.xml'),
|
||||
(u'\u0e14\u0e39\u0e14\u0e27\u0e07-\u0e42\u0e2b\u0e23\u0e32\u0e28\u0e32\u0e2a\u0e15\u0e23\u0e4c','http://www.komchadluek.net/rss/horoscope.xml'),
|
||||
(u'\u0e27\u0e34\u0e17\u0e22\u0e4c\u0e28\u0e32\u0e2a\u0e15\u0e23\u0e4c-\u0e44\u0e2d\u0e17\u0e35','http://www.komchadluek.net/rss/scienceit.xml'),
|
||||
(u'\u0e28\u0e32\u0e2a\u0e19\u0e32 \u0e28\u0e34\u0e25\u0e1b\u0e30-\u0e27\u0e31\u0e12\u0e19\u0e18\u0e23\u0e23\u0e21 \u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e2a\u0e38\u0e02','http://www.komchadluek.net/rss/artculture.xml'),
|
||||
(u'\u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29\u0e32', 'http://www.komchadluek.net/rss/education.xml'),
|
||||
(u'\u0e1a\u0e17\u0e04\u0e27\u0e32\u0e21','http://www.komchadluek.net/rss/article.xml'),
|
||||
(u'\u0e2d\u0e32\u0e0a\u0e0d\u0e32\u0e01\u0e23\u0e23\u0e21', 'http://www.komchadluek.net/rss/crime.xml')
|
||||
)
|
@ -1,36 +1,37 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
|
||||
__author__ = 'Vadim Dyadkin'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Computerra(BasicNewsRecipe):
|
||||
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
|
||||
recursion = 50
|
||||
oldest_article = 100
|
||||
__author__ = 'Vadim Dyadkin'
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
simultaneous_downloads = 5
|
||||
language = 'ru'
|
||||
description = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u044b, \u043e\u043a\u043e\u043b\u043e\u043d\u0430\u0443\u0447\u043d\u044b\u0435 \u0438 \u043e\u043a\u043e\u043b\u043e\u0444\u0438\u043b\u043e\u0441\u043e\u0444\u0441\u043a\u0438\u0435 \u0441\u0442\u0430\u0442\u044c\u0438, \u0433\u0430\u0434\u0436\u0435\u0442\u044b.'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
|
||||
|
||||
|
||||
feeds = [(u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430', 'http://feeds.feedburner.com/ct_news/'),]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
|
||||
dict(name='ul', attrs={'class': "related_post"}),
|
||||
dict(name='p', attrs={'class': 'info'}),
|
||||
dict(name='a', attrs={'rel': 'tag', 'class': 'twitter-share-button', 'type': 'button_count'}),
|
||||
dict(name='h2', attrs={}),]
|
||||
|
||||
extra_css = 'body { text-align: justify; }'
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner:origLink', article.get('guid'))
|
||||
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Vadim Dyadkin, dyadkin@gmail.com'
|
||||
__author__ = 'Vadim Dyadkin'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Computerra(BasicNewsRecipe):
|
||||
title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430'
|
||||
oldest_article = 100
|
||||
__author__ = 'Vadim Dyadkin (edited by A. Chewi)'
|
||||
max_articles_per_feed = 50
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
simultaneous_downloads = 5
|
||||
language = 'ru'
|
||||
description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'content'}),]
|
||||
|
||||
feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'),]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id': ['fin', 'idc-container', 'idc-noscript',]}),
|
||||
dict(name='ul', attrs={'class': "related_post"}),
|
||||
dict(name='p', attrs={'class': 'info'}),
|
||||
dict(name='a', attrs={'class': 'twitter-share-button'}),
|
||||
dict(name='a', attrs={'type': 'button_count'}),
|
||||
dict(name='h2', attrs={})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?print=true'
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lanacion.com.ar
|
||||
'''
|
||||
@ -17,14 +17,16 @@ class Lanacion(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'es_AR'
|
||||
delay = 14
|
||||
publication_type = 'newspaper'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.lanacion.com.ar/imgs/layout/logos/ln341x47.gif'
|
||||
extra_css = """ h1{font-family: Georgia,serif}
|
||||
h2{color: #626262}
|
||||
masthead_url = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
|
||||
extra_css = """
|
||||
h1{font-family: Georgia,serif}
|
||||
h2{color: #626262; font-weight: normal; font-size: 1.1em}
|
||||
body{font-family: Arial,sans-serif}
|
||||
img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
|
||||
.notaFecha{color: #808080}
|
||||
.notaFecha{color: #808080; font-size: small}
|
||||
.notaEpigrafe{font-size: x-small}
|
||||
.topNota h1{font-family: Arial,sans-serif}
|
||||
"""
|
||||
@ -37,47 +39,75 @@ class Lanacion(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['nota floatFix','topNota','nota','post']})]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['topNota','itemHeader','nota','itemBody']})
|
||||
,dict(name='div', attrs={'id':'content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
||||
,dict(name='ul' , attrs={'class':['cajaHerramientas cajaTop noprint','herramientas noprint']})
|
||||
,dict(name='div' , attrs={'class':['cajaHerramientas noprint','cajaHerramientas floatFix'] })
|
||||
,dict(attrs={'class':['titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
|
||||
,dict(name='div' , attrs={'class':['titulosMultimedia','herramientas noprint','cajaHerramientas noprint','cajaHerramientas floatFix'] })
|
||||
,dict(attrs={'class':['izquierda','espacio17','espacio10','espacio20','floatFix ultimasNoticias','relacionadas','titulosMultimedia','derecha','techo color','encuesta','izquierda compartir','floatFix','videoCentro']})
|
||||
,dict(name=['iframe','embed','object','form','base','hr','meta','link','input'])
|
||||
]
|
||||
|
||||
remove_tags_after = dict(attrs={'class':['tags','nota-destacado']})
|
||||
remove_attributes = ['height','width','visible','onclick','data-count','name']
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?origen=2' )
|
||||
,(u'Politica' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=30' )
|
||||
,(u'Economia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=272' )
|
||||
,(u'Deportes' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=131' )
|
||||
,(u'Informacion General' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=21' )
|
||||
,(u'Cultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1' )
|
||||
,(u'Opinion' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=28' )
|
||||
,(u'Espectaculos' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=120' )
|
||||
,(u'Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7' )
|
||||
,(u'Ciencia&Salud' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=498' )
|
||||
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
|
||||
,(u'Enfoques' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=421' )
|
||||
,(u'Comercio Exterior' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=347' )
|
||||
,(u'Tecnologia' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=432' )
|
||||
,(u'Arquitectura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=366' )
|
||||
,(u'Turismo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=504' )
|
||||
,(u'Al volante' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=371' )
|
||||
,(u'El Campo' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=337' )
|
||||
,(u'Moda y Belleza' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1312' )
|
||||
,(u'Inmuebles Comerciales', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1363' )
|
||||
,(u'Countries' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=1348' )
|
||||
,(u'adnCultura' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6734' )
|
||||
,(u'The Wall Street Journal Americas', u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=6373' )
|
||||
,(u'Estilo de vida' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7353' )
|
||||
,(u'Management' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7380' )
|
||||
,(u'Bicentenario' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=7276' )
|
||||
(u'Politica' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30' )
|
||||
,(u'Deportes' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
|
||||
,(u'Economia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
|
||||
,(u'Informacion General' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21' )
|
||||
,(u'Cultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1' )
|
||||
,(u'Opinion' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=28' )
|
||||
,(u'Espectaculos' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=120' )
|
||||
,(u'Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7' )
|
||||
,(u'Ciencia&Salud' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=498' )
|
||||
,(u'Revista' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=494' )
|
||||
,(u'Enfoques' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=421' )
|
||||
,(u'Comercio Exterior' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=347' )
|
||||
,(u'Tecnologia' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=432' )
|
||||
,(u'Arquitectura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=366' )
|
||||
,(u'Turismo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=504' )
|
||||
,(u'Al volante' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=371' )
|
||||
,(u'El Campo' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=337' )
|
||||
,(u'Moda y Belleza' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1312')
|
||||
,(u'Inmuebles Comerciales', u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1363')
|
||||
,(u'Countries' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1348')
|
||||
,(u'adnCultura' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6734')
|
||||
,(u'The WSJ Americas' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=6373')
|
||||
,(u'Comunidad' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=1344')
|
||||
,(u'Management' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7380')
|
||||
,(u'Bicentenario' , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=7276')
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = BasicNewsRecipe.get_article_url(self,article)
|
||||
if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
|
||||
return self.browser.open_novisit(link).geturl()
|
||||
if link.rfind('galeria=') > 0:
|
||||
return None
|
||||
return link
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -1,7 +1,20 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Eddie Lau'
|
||||
|
||||
# Users of Kindle 3 (with limited system-level CJK support)
|
||||
# please replace the following "True" with "False".
|
||||
__MakePeriodical__ = True
|
||||
# Turn it to True if your device supports display of CJK titles
|
||||
__UseChineseTitle__ = False
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||
2011/02/28: rearrange the sections
|
||||
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||
View] make it the same title if generating a periodical, so past issue will be automatically put into "Past Issues"
|
||||
folder in Kindle 3
|
||||
2011/02/20: skip duplicated links in finance section, put photos which may extend a whole page to the back of the articles
|
||||
clean up the indentation
|
||||
2010/12/07: add entertainment section, use newspaper front page as ebook cover, suppress date display in section list
|
||||
@ -19,55 +32,58 @@ import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
class MPHKRecipe(BasicNewsRecipe):
|
||||
IsCJKWellSupported = True # Set to False to avoid generating periodical in which CJK characters can't be displayed in section/article view
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = ('Hong Kong Chinese Newspaper (http://news.mingpao.com). If'
|
||||
'you are using a Kindle with firmware < 3.1, customize the'
|
||||
'recipe')
|
||||
publisher = 'MingPao'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
publisher = 'MingPao'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment page content
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['photo']})
|
||||
]
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']})] # for the finance page
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page
|
||||
dict(name='table')] # for content fetched from life.mingpao.com
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<h1>'),
|
||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '</h1>'),
|
||||
(re.compile(r'<p><a href=.+?</a></p>', re.DOTALL|re.IGNORECASE), # for entertainment page
|
||||
lambda match: '')
|
||||
lambda match: ''),
|
||||
# skip <br> after title in life.mingpao.com fetched article
|
||||
(re.compile(r"<div id='newscontent'><br>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "<div id='newscontent'>"),
|
||||
(re.compile(r"<br><br></b>", re.DOTALL|re.IGNORECASE),
|
||||
lambda match: "</b>")
|
||||
]
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
@ -99,253 +115,314 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return dt_local
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchday(self):
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
def get_fetchday(self):
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
def get_cover_url(self):
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
|
||||
(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
|
||||
('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
|
||||
(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
# special - finance
|
||||
fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
return feeds
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_fin_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
|
||||
def parse_ent_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if self.IsCJKWellSupported == True:
|
||||
# use Chinese title
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f) ' + self.get_fetchformatteddate()
|
||||
else:
|
||||
# use English title
|
||||
title = self.short_title() + ' ' + self.get_fetchformatteddate()
|
||||
if True: # force date in title
|
||||
# title += strftime(self.timefmt)
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if self.IsCJKWellSupported == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
def parse_fin_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
def parse_ent_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
def parse_col_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
66
resources/recipes/monden.recipe
Normal file
@ -0,0 +1,66 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
monden.info
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Monden(BasicNewsRecipe):
|
||||
title = u'Monden'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Arti\u015fti, interviuri, concerte.. MUZIC\u0102'
|
||||
publisher = u'Monden'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Muzica'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.monden.info/wp-content/uploads/2009/04/mondeninfo-logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['postAuthor']})
|
||||
, dict(name='div', attrs={'class':['postLike']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['postLike']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.monden.info/feed/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -14,7 +14,7 @@ class NationalGeoRo(BasicNewsRecipe):
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'S\u0103 avem grij\u0103 de planet\u0103'
|
||||
publisher = 'National Geographic'
|
||||
oldest_article = 5
|
||||
oldest_article = 35
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
33
resources/recipes/nbonline.recipe
Normal file
@ -0,0 +1,33 @@
|
||||
EMAILADDRESS = 'hoge@foobar.co.jp'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class NBOnline(BasicNewsRecipe):
|
||||
title = u'Nikkei Business Online'
|
||||
language = 'ja'
|
||||
description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
|
||||
__author__ = 'Ado Nishimura'
|
||||
needs_subscription = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_before = dict(id='kanban')
|
||||
remove_tags = [dict(name='div', id='footer')]
|
||||
|
||||
feeds = [('Nikkei Buisiness Online', 'http://business.nikkeibp.co.jp/rss/all_nbo.rdf')]
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://business.nikkeibp.co.jp/images/nbo/200804/parts/logo.gif'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
|
||||
br.select_form(name='loginActionForm')
|
||||
br['email'] = EMAILADDRESS
|
||||
br['userId'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?ST=print'
|
@ -1,14 +1,14 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: utf-8 -*-
|
||||
#Based on Lars Jacob's Taz Digiabo recipe
|
||||
#Based on veezh's original recipe and Kovid Goyal's New York Times recipe
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, veezh'
|
||||
__copyright__ = '2011, Snaab'
|
||||
|
||||
'''
|
||||
www.nrc.nl
|
||||
'''
|
||||
import os, urllib2, zipfile
|
||||
import os, zipfile
|
||||
import time
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
@ -17,41 +17,59 @@ from calibre.ptempfile import PersistentTemporaryFile
|
||||
class NRCHandelsblad(BasicNewsRecipe):
|
||||
|
||||
title = u'NRC Handelsblad'
|
||||
description = u'De EPUB-versie van NRC'
|
||||
description = u'De ePaper-versie van NRC'
|
||||
language = 'nl'
|
||||
lang = 'nl-NL'
|
||||
needs_subscription = True
|
||||
|
||||
__author__ = 'veezh'
|
||||
__author__ = 'Snaab'
|
||||
|
||||
conversion_options = {
|
||||
'no_default_epub_cover' : True
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://login.nrc.nl/login')
|
||||
br.select_form(nr=0)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def build_index(self):
|
||||
|
||||
today = time.strftime("%Y%m%d")
|
||||
|
||||
domain = "http://digitaleeditie.nrc.nl"
|
||||
|
||||
url = domain + "/digitaleeditie/helekrant/epub/nrc_" + today + ".epub"
|
||||
# print url
|
||||
#print url
|
||||
|
||||
try:
|
||||
f = urllib2.urlopen(url)
|
||||
except urllib2.HTTPError:
|
||||
br = self.get_browser()
|
||||
f = br.open(url)
|
||||
except:
|
||||
self.report_progress(0,_('Kan niet inloggen om editie te downloaden'))
|
||||
raise ValueError('Krant van vandaag nog niet beschikbaar')
|
||||
|
||||
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0,_('downloading epub'))
|
||||
tmp.write(f.read())
|
||||
tmp.close()
|
||||
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
self.report_progress(0,_('extracting epub'))
|
||||
|
||||
zfile.extractall(self.output_dir)
|
||||
f.close()
|
||||
br.close()
|
||||
if zipfile.is_zipfile(tmp):
|
||||
try:
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
zfile.extractall(self.output_dir)
|
||||
self.report_progress(0,_('extracting epub'))
|
||||
except zipfile.BadZipfile:
|
||||
self.report_progress(0,_('BadZip error, continuing'))
|
||||
|
||||
tmp.close()
|
||||
index = os.path.join(self.output_dir, 'content.opf')
|
||||
index = os.path.join(self.output_dir, 'metadata.opf')
|
||||
|
||||
self.report_progress(1,_('epub downloaded and extracted'))
|
||||
|
||||
|
23
resources/recipes/oakland_north.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class AdvancedUserRecipe1299640653(BasicNewsRecipe):
|
||||
title = u'Oakland North'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
|
||||
language = 'en'
|
||||
__author__ = 'noah'
|
||||
description = 'Oakland North'
|
||||
category = 'news'
|
||||
no_stylesheets = True
|
||||
|
||||
masthead_url = 'http://oaklandnorth.net/wp-content/themes/oaklandnorth/images/masthead.png'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':re.compile(r'\bpost\b(?!-)', re.IGNORECASE)})]
|
||||
|
||||
remove_tags_after = [dict(name='p', attrs={'class':'post-postscript'})]
|
||||
|
||||
remove_tags = [dict(name='p', attrs={'class':'post-postscript'})]
|
||||
|
||||
feeds = [(u'All Headlines', u'http://oaklandnorth.net/feed/')]
|
72
resources/recipes/onemagazine.recipe
Normal file
@ -0,0 +1,72 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
onemagazine.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Onemagazine(BasicNewsRecipe):
|
||||
title = u'The ONE'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Be the ONE, not anyone ..'
|
||||
publisher = u'The ONE'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.onemagazine.ro/images/logo_rss.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article'})
|
||||
, dict(name='div', attrs={'class':'gallery clearfix'})
|
||||
, dict(name='div', attrs={'align':'justify'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['info']})
|
||||
, dict(name='table', attrs={'class':['connect_widget_interactive_area']})
|
||||
, dict(name='span', attrs={'class':['photo']})
|
||||
, dict(name='div', attrs={'class':['counter']})
|
||||
, dict(name='div', attrs={'class':['carousel']})
|
||||
, dict(name='div', attrs={'class':['jcarousel-container jcarousel-container-horizontal']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='table', attrs={'class':['connect_widget_interactive_area']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.onemagazine.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
67
resources/recipes/pcworldro.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
pcworld.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Pcworld(BasicNewsRecipe):
|
||||
title = u'PC World'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'IT'
|
||||
publisher = u'PC World'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,IT'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.pcworld.ro/img/ui/header-logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'content_page'})
|
||||
, dict(name='div', attrs={'class':'box_center content_body'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='h3', attrs={'class':['breadcrumb']})
|
||||
, dict(name='div', attrs={'class':['box_center voteaza']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['box_center voteaza']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.pcworld.ro/contents/pcworld.rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
70
resources/recipes/promotor.recipe
Normal file
@ -0,0 +1,70 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
promotor.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Promotor(BasicNewsRecipe):
|
||||
title = u'Promotor'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Auto-moto'
|
||||
publisher = u'Promotor'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,TV,Auto'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.promotor.ro/images/logo_promotor.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'casetatitluarticol'})
|
||||
, dict(name='div', attrs={'style':'width: 273px; height: 210px; overflow: hidden; margin: 0pt auto;'})
|
||||
, dict(name='div', attrs={'class':'textb'})
|
||||
, dict(name='div', attrs={'class':'contentarticol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='td', attrs={'class':['connect_widget_vertical_center connect_widget_button_cell']})
|
||||
, dict(name='div', attrs={'class':['etichetagry']})
|
||||
, dict(name='span', attrs={'class':['textb']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['etichetagry']})
|
||||
, dict(name='span', attrs={'class':['textb']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.promotor.ro/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
71
resources/recipes/protvmagazin.recipe
Normal file
@ -0,0 +1,71 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
protvmagazin.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Protvmagazin(BasicNewsRecipe):
|
||||
title = u'ProTv Magazin'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Ghid TV'
|
||||
publisher = u'ProTv Magazin'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,TV'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.protvmagazin.ro/images/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'box gradient'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['title']})
|
||||
, dict(name='div', attrs={'id':['online_only']})
|
||||
, dict(name='div', attrs={'class':['show_article_rating']})
|
||||
, dict(name='ul', attrs={'class':['breadcrumbs']})
|
||||
, dict(name='p', attrs={'class':['tags']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='table', attrs={'class':['connect_widget_interactive_area']})
|
||||
, dict(name='p', attrs={'class':['tags']})
|
||||
, dict(name='dev', attrs={'class':['connect_widget_sample_connections clearfix']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.protvmagazin.ro/rss/articole-noi')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
59
resources/recipes/psychologies.recipe
Normal file
@ -0,0 +1,59 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
psychologies.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Psychologies(BasicNewsRecipe):
|
||||
title = u'Psychologies'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Psihologie \u015fi Dezvoltare Personal\u0103..'
|
||||
publisher = u'Psychologies'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Psihologie'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.psychologies.ro/images/default/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'nav'})
|
||||
, dict(name='div', attrs={'id':'textarticol'})
|
||||
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds.feedburner.com/Psychologies')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
54
resources/recipes/publika.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
publika.md
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Publika(BasicNewsRecipe):
|
||||
title = u'Publika'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'\u015etiri din Moldova'
|
||||
publisher = u'Publika'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Stiri,Moldova'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://assets.publika.md/images/logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'colLeft'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['articleInfo']})
|
||||
, dict(name='div', attrs={'class':['articleRelated']})
|
||||
, dict(name='div', attrs={'class':['roundedBox socialSharing']})
|
||||
, dict(name='div', attrs={'class':['comment clearfix']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['roundedBox socialSharing']})
|
||||
, dict(name='div', attrs={'class':['comment clearfix']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://rss.publika.md/stiri.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
56
resources/recipes/sltrib.py
Normal file
@ -0,0 +1,56 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1278347258(BasicNewsRecipe):
|
||||
title = u'Salt Lake City Tribune'
|
||||
__author__ = 'Charles Holbert'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
||||
description = '''Utah's independent news source since 1871'''
|
||||
publisher = 'http://www.sltrib.com/'
|
||||
category = 'news, Utah, SLC'
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
#delay = 1
|
||||
#simultaneous_downloads = 1
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
|
||||
#masthead_url = 'http://www.sltrib.com/csp/cms/sites/sltrib/assets/images/logo_main.png'
|
||||
#cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg9/lg/UT_SLT.jpg'
|
||||
|
||||
keep_only_tags = [dict(name='div',attrs={'id':'imageBox'})
|
||||
,dict(name='div',attrs={'class':'headline'})
|
||||
,dict(name='div',attrs={'class':'byline'})
|
||||
,dict(name='p',attrs={'class':'TEXT_w_Indent'})]
|
||||
|
||||
feeds = [(u'SL Tribune Today', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=All'),
|
||||
(u'Utah News', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=UtahNews'),
|
||||
(u'Business News', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=Money'),
|
||||
(u'Technology', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=Technology'),
|
||||
(u'Most Popular', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rsspopular.csp'),
|
||||
(u'Sports', u'http://www.sltrib.com/csp/cms/sites/sltrib/RSS/rss.csp?cat=Sports')]
|
||||
|
||||
extra_css = '''
|
||||
.headline{font-family:Arial,Helvetica,sans-serif; font-size:xx-large; font-weight: bold; color:#0E5398;}
|
||||
.byline{font-family:Arial,Helvetica,sans-serif; color:#333333; font-size:xx-small;}
|
||||
.storytext{font-family:Arial,Helvetica,sans-serif; font-size:medium;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
seg = url.split('/')
|
||||
x = seg[5].split('-')
|
||||
baseURL = 'http://www.sltrib.com/csp/cms/sites/sltrib/pages/printerfriendly.csp?id='
|
||||
s = baseURL + x[0]
|
||||
return s
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
href = 'http://www.newseum.org/todaysfrontpages/hr.asp?fpVname=UT_SLT&ref_pge=lst'
|
||||
soup = self.index_to_soup(href)
|
||||
div = soup.find('div',attrs={'class':'tfpLrgView_container'})
|
||||
if div:
|
||||
cover_url = div.img['src']
|
||||
return cover_url
|
||||
|
@ -3,6 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1299054026(BasicNewsRecipe):
|
||||
title = u'Thai Post Daily'
|
||||
__author__ = 'Chotechai P.'
|
||||
language = 'th'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://upload.wikimedia.org/wikipedia/th/1/10/ThaiPost_Logo.png'
|
||||
|
52
resources/recipes/timesnewroman.recipe
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
timesnewroman.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TimesNewRoman(BasicNewsRecipe):
|
||||
title = u'Times New Roman'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Cotidian independent de umor voluntar'
|
||||
publisher = u'Times New Roman'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Fun'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.timesnewroman.ro/templates/TNRV2/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'page'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['articleinfo']})
|
||||
, dict(name='div',attrs={'class':['vergefacebooklike']})
|
||||
, dict(name='div', attrs={'class':'cleared'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'cleared'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.timesnewroman.ro/index.php?format=feed&type=rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
51
resources/recipes/trombon.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
trombon.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Trombon(BasicNewsRecipe):
|
||||
title = u'Trombon'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Parodii si Pamflete'
|
||||
publisher = u'Trombon'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Fun'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.trombon.ro/i/trombon.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'articol'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['info_2']})
|
||||
, dict(name='iframe', attrs={'scrolling':['no']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'article_vote'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://feeds.feedburner.com/trombon/ABWb?format=xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
72
resources/recipes/tvmania.recipe
Normal file
@ -0,0 +1,72 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
tvmania.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Tvmania(BasicNewsRecipe):
|
||||
title = u'TVmania'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Programe TV'
|
||||
publisher = u'TVmania'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,TV'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.tvmania.ro/wp-content/themes/tvmania/images/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'articol'})
|
||||
, dict(name='font', attrs={'class':'mic'})
|
||||
, dict(name='div', attrs={'id':'header_recomandari'})
|
||||
, dict(name='div', attrs={'class':'main-image'})
|
||||
, dict(name='div', attrs={'id':'articol_recomandare'})
|
||||
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['iLikeThis']})
|
||||
, dict(name='span', attrs={'class':['tag-links']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['iLikeThis']})
|
||||
, dict(name='span', attrs={'class':['tag-links']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.tvmania.ro/feed')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
75
resources/recipes/viva.recipe
Normal file
@ -0,0 +1,75 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
viva.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Viva(BasicNewsRecipe):
|
||||
title = u'Viva'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Vedete si evenimente'
|
||||
publisher = u'Viva'
|
||||
oldest_article = 25
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.viva.ro/images/default/viva.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'articol'})
|
||||
, dict(name='div', attrs={'class':'gallery clearfix'})
|
||||
, dict(name='div', attrs={'align':'justify'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['breadcrumbs']})
|
||||
, dict(name='div', attrs={'class':['links clearfix']})
|
||||
, dict(name='a', attrs={'id':['img_arrow_right']})
|
||||
, dict(name='img', attrs={'id':['zoom']})
|
||||
, dict(name='div', attrs={'class':['foto_counter']})
|
||||
, dict(name='div', attrs={'class':['gal_select clearfix']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['links clearfix']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Vedete', u'http://feeds.feedburner.com/viva-Vedete')
|
||||
,(u'Evenimente', u'http://feeds.feedburner.com/viva-Evenimente')
|
||||
,(u'Frumusete', u'http://feeds.feedburner.com/viva-Beauty-Fashion')
|
||||
,(u'Noutati', u'http://feeds.feedburner.com/viva-Noutati')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
54
resources/recipes/wallstreetro.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
wall-street.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class WallStreetRo(BasicNewsRecipe):
|
||||
title = u'Wall Street'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = ''
|
||||
publisher = 'Wall Street'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://img.wall-street.ro/images/WS_new_logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'article_header'})
|
||||
, dict(name='div', attrs={'class':'article_text'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['page_breadcrumbs']})
|
||||
, dict(name='div', attrs={'id':['article_user_toolbox']})
|
||||
, dict(name='p', attrs={'class':['comments_count_container']})
|
||||
, dict(name='div', attrs={'class':['article_left_column']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'clearfloat'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://img.wall-street.ro/rssfeeds/wall-street.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -16,6 +16,7 @@
|
||||
"template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n",
|
||||
"print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n",
|
||||
"titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n",
|
||||
"subitems": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n items = [v.strip() for v in val.split(',')]\n rv = set()\n for item in items:\n component = item.split('.')\n try:\n if ei == 0:\n rv.add('.'.join(component[si:]))\n else:\n rv.add('.'.join(component[si:ei]))\n except:\n pass\n return ', '.join(sorted(rv, key=sort_key))\n",
|
||||
"sublist": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n val = val.split(sep)\n try:\n if ei == 0:\n return sep.join(val[si:])\n else:\n return sep.join(val[si:ei])\n except:\n return ''\n",
|
||||
"test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n",
|
||||
"eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n",
|
||||
|
@ -4,6 +4,7 @@
|
||||
# #
|
||||
# #
|
||||
# copyright 2002 Paul Henry Tremblay #
|
||||
# Copyright 2011 Kovid Goyal
|
||||
# #
|
||||
# This program is distributed in the hope that it will be useful, #
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
||||
@ -19,21 +20,21 @@
|
||||
#########################################################################
|
||||
|
||||
-->
|
||||
<xsl:output method="xml" encoding="UTF-8"/>
|
||||
<xsl:key name="note-link" match="fb:section" use="@id"/>
|
||||
<xsl:template match="/*">
|
||||
<html>
|
||||
<head>
|
||||
<xsl:if test="fb:description/fb:title-info/fb:lang = 'ru'">
|
||||
<meta HTTP-EQUIV="content-type" CONTENT="text/html; charset=UTF-8"/>
|
||||
</xsl:if>
|
||||
<title>
|
||||
<xsl:value-of select="fb:description/fb:title-info/fb:book-title"/>
|
||||
</title>
|
||||
<style type="text/css">
|
||||
<xsl:output method="xml" encoding="UTF-8"/>
|
||||
<xsl:key name="note-link" match="fb:section" use="@id"/>
|
||||
<xsl:template match="/*">
|
||||
<html>
|
||||
<head>
|
||||
<xsl:if test="fb:description/fb:title-info/fb:lang = 'ru'">
|
||||
<meta HTTP-EQUIV="content-type" CONTENT="text/html; charset=UTF-8"/>
|
||||
</xsl:if>
|
||||
<title>
|
||||
<xsl:value-of select="fb:description/fb:title-info/fb:book-title"/>
|
||||
</title>
|
||||
<style type="text/css">
|
||||
a { color : #0002CC }
|
||||
|
||||
a:hover { color : #BF0000 }
|
||||
a:hover { color : #BF0000 }
|
||||
|
||||
body { background-color : #FEFEFE; color : #000000; font-family : Verdana, Geneva, Arial, Helvetica, sans-serif; text-align : justify }
|
||||
|
||||
@ -62,90 +63,90 @@
|
||||
.epigraph{width:50%; margin-left : 35%;}
|
||||
|
||||
div.paragraph { text-align: justify; text-indent: 2em; }
|
||||
</style>
|
||||
</style>
|
||||
<link rel="stylesheet" type="text/css" href="inline-styles.css" />
|
||||
</head>
|
||||
<body>
|
||||
<xsl:for-each select="fb:description/fb:title-info/fb:annotation">
|
||||
<div>
|
||||
<xsl:call-template name="annotation"/>
|
||||
</div>
|
||||
<hr/>
|
||||
</xsl:for-each>
|
||||
<!-- BUILD TOC -->
|
||||
<ul>
|
||||
<xsl:apply-templates select="fb:body" mode="toc"/>
|
||||
</ul>
|
||||
<hr/>
|
||||
</head>
|
||||
<body>
|
||||
<xsl:for-each select="fb:description/fb:title-info/fb:annotation">
|
||||
<div>
|
||||
<xsl:call-template name="annotation"/>
|
||||
</div>
|
||||
<hr/>
|
||||
</xsl:for-each>
|
||||
<!-- BUILD TOC -->
|
||||
<ul>
|
||||
<xsl:apply-templates select="fb:body" mode="toc"/>
|
||||
</ul>
|
||||
<hr/>
|
||||
<!-- END BUILD TOC -->
|
||||
<!-- BUILD BOOK -->
|
||||
<xsl:for-each select="fb:body">
|
||||
<xsl:if test="position()!=1">
|
||||
<hr/>
|
||||
</xsl:if>
|
||||
<xsl:if test="@name">
|
||||
<h4 align="center">
|
||||
<xsl:value-of select="@name"/>
|
||||
</h4>
|
||||
</xsl:if>
|
||||
<!-- <xsl:apply-templates /> -->
|
||||
<xsl:apply-templates/>
|
||||
</xsl:for-each>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
<!-- author template -->
|
||||
<xsl:template name="author">
|
||||
<xsl:value-of select="fb:first-name"/>
|
||||
<xsl:text disable-output-escaping="no"> </xsl:text>
|
||||
<xsl:value-of select="fb:middle-name"/> 
|
||||
<!-- BUILD BOOK -->
|
||||
<xsl:for-each select="fb:body">
|
||||
<xsl:if test="position()!=1">
|
||||
<hr/>
|
||||
</xsl:if>
|
||||
<xsl:if test="@name">
|
||||
<h4 align="center">
|
||||
<xsl:value-of select="@name"/>
|
||||
</h4>
|
||||
</xsl:if>
|
||||
<!-- <xsl:apply-templates /> -->
|
||||
<xsl:apply-templates/>
|
||||
</xsl:for-each>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
<!-- author template -->
|
||||
<xsl:template name="author">
|
||||
<xsl:value-of select="fb:first-name"/>
|
||||
<xsl:text disable-output-escaping="no"> </xsl:text>
|
||||
<xsl:value-of select="fb:middle-name"/> 
|
||||
<xsl:text disable-output-escaping="no"> </xsl:text>
|
||||
<xsl:value-of select="fb:last-name"/>
|
||||
<br/>
|
||||
</xsl:template>
|
||||
<!-- secuence template -->
|
||||
<xsl:template name="sequence">
|
||||
<LI/>
|
||||
<xsl:value-of select="@name"/>
|
||||
<xsl:if test="@number">
|
||||
<xsl:text disable-output-escaping="no">, #</xsl:text>
|
||||
<xsl:value-of select="@number"/>
|
||||
</xsl:if>
|
||||
<xsl:if test="fb:sequence">
|
||||
<ul>
|
||||
<xsl:for-each select="fb:sequence">
|
||||
<xsl:call-template name="sequence"/>
|
||||
</xsl:for-each>
|
||||
</ul>
|
||||
</xsl:if>
|
||||
<!-- <br/> -->
|
||||
</xsl:template>
|
||||
<!-- toc template -->
|
||||
<xsl:template match="fb:section|fb:body" mode="toc">
|
||||
<xsl:choose>
|
||||
<xsl:when test="name()='body' and position()=1 and not(fb:title)">
|
||||
<xsl:apply-templates select="fb:section" mode="toc"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<li>
|
||||
<a href="#TOC_{generate-id()}"><xsl:value-of select="normalize-space(fb:title/fb:p[1] | @name)"/></a>
|
||||
<xsl:if test="fb:section">
|
||||
<ul><xsl:apply-templates select="fb:section" mode="toc"/></ul>
|
||||
</xsl:if>
|
||||
</li>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- description -->
|
||||
<xsl:template match="fb:description">
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
<!-- body -->
|
||||
<xsl:template match="fb:body">
|
||||
<div><xsl:apply-templates/></div>
|
||||
</xsl:template>
|
||||
<xsl:value-of select="fb:last-name"/>
|
||||
<br/>
|
||||
</xsl:template>
|
||||
<!-- secuence template -->
|
||||
<xsl:template name="sequence">
|
||||
<LI/>
|
||||
<xsl:value-of select="@name"/>
|
||||
<xsl:if test="@number">
|
||||
<xsl:text disable-output-escaping="no">, #</xsl:text>
|
||||
<xsl:value-of select="@number"/>
|
||||
</xsl:if>
|
||||
<xsl:if test="fb:sequence">
|
||||
<ul>
|
||||
<xsl:for-each select="fb:sequence">
|
||||
<xsl:call-template name="sequence"/>
|
||||
</xsl:for-each>
|
||||
</ul>
|
||||
</xsl:if>
|
||||
<!-- <br/> -->
|
||||
</xsl:template>
|
||||
<!-- toc template -->
|
||||
<xsl:template match="fb:section|fb:body" mode="toc">
|
||||
<xsl:choose>
|
||||
<xsl:when test="name()='body' and position()=1 and not(fb:title)">
|
||||
<xsl:apply-templates select="fb:section" mode="toc"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<li>
|
||||
<a href="#TOC_{generate-id()}"><xsl:value-of select="normalize-space(fb:title/fb:p[1] | @name)"/></a>
|
||||
<xsl:if test="fb:section">
|
||||
<ul><xsl:apply-templates select="fb:section" mode="toc"/></ul>
|
||||
</xsl:if>
|
||||
</li>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- description -->
|
||||
<xsl:template match="fb:description">
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
<!-- body -->
|
||||
<xsl:template match="fb:body">
|
||||
<div><xsl:apply-templates/></div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="fb:section">
|
||||
<xsl:template match="fb:section">
|
||||
<xsl:variable name="section_has_title">
|
||||
<xsl:choose>
|
||||
<xsl:when test="./fb:title"><xsl:value-of select="generate-id()" /></xsl:when>
|
||||
@ -164,15 +165,15 @@
|
||||
<xsl:apply-templates>
|
||||
<xsl:with-param name="section_toc_id" select="$section_has_title" />
|
||||
</xsl:apply-templates>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- section/title -->
|
||||
<xsl:template match="fb:section/fb:title|fb:poem/fb:title">
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- section/title -->
|
||||
<xsl:template match="fb:section/fb:title|fb:poem/fb:title">
|
||||
<xsl:param name="section_toc_id" />
|
||||
<xsl:choose>
|
||||
<xsl:when test="count(ancestor::node()) < 9">
|
||||
<xsl:element name="{concat('h',count(ancestor::node())-3)}">
|
||||
<xsl:choose>
|
||||
<xsl:when test="count(ancestor::node()) < 9">
|
||||
<xsl:element name="{concat('h',count(ancestor::node())-3)}">
|
||||
<xsl:if test="../@id">
|
||||
<xsl:attribute name="id"><xsl:value-of select="../@id" /></xsl:attribute>
|
||||
</xsl:if>
|
||||
@ -181,79 +182,79 @@
|
||||
<xsl:attribute name="id">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<a name="TOC_{generate-id()}"></a>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:element name="h6">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- section/title -->
|
||||
<xsl:template match="fb:body/fb:title">
|
||||
<a name="TOC_{generate-id()}"></a>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:element name="h6">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- section/title -->
|
||||
<xsl:template match="fb:body/fb:title">
|
||||
<xsl:element name="h1">
|
||||
<xsl:apply-templates />
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="fb:title/fb:p">
|
||||
<xsl:apply-templates/><xsl:text disable-output-escaping="no"> </xsl:text><br/>
|
||||
</xsl:template>
|
||||
<!-- subtitle -->
|
||||
<xsl:template match="fb:subtitle">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<h5>
|
||||
<xsl:apply-templates/>
|
||||
</h5>
|
||||
</xsl:template>
|
||||
<!-- p -->
|
||||
<xsl:template match="fb:p">
|
||||
<xsl:template match="fb:title/fb:p">
|
||||
<xsl:apply-templates/><xsl:text disable-output-escaping="no"> </xsl:text><br/>
|
||||
</xsl:template>
|
||||
<!-- subtitle -->
|
||||
<xsl:template match="fb:subtitle">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<h5>
|
||||
<xsl:apply-templates/>
|
||||
</h5>
|
||||
</xsl:template>
|
||||
<!-- p -->
|
||||
<xsl:template match="fb:p">
|
||||
<xsl:element name="div">
|
||||
<xsl:attribute name="class">paragraph</xsl:attribute>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:if test="@style">
|
||||
<xsl:attribute name="style"><xsl:value-of select="@style"/></xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<!-- strong -->
|
||||
<xsl:template match="fb:strong">
|
||||
<b><xsl:apply-templates/></b>
|
||||
</xsl:template>
|
||||
<!-- emphasis -->
|
||||
<xsl:template match="fb:emphasis">
|
||||
<i> <xsl:apply-templates/></i>
|
||||
</xsl:template>
|
||||
<!-- style -->
|
||||
<xsl:template match="fb:style">
|
||||
<span class="{@name}"><xsl:apply-templates/></span>
|
||||
</xsl:template>
|
||||
<!-- empty-line -->
|
||||
<xsl:template match="fb:empty-line">
|
||||
<br/>
|
||||
</xsl:template>
|
||||
</xsl:template>
|
||||
<!-- strong -->
|
||||
<xsl:template match="fb:strong">
|
||||
<b><xsl:apply-templates/></b>
|
||||
</xsl:template>
|
||||
<!-- emphasis -->
|
||||
<xsl:template match="fb:emphasis">
|
||||
<i> <xsl:apply-templates/></i>
|
||||
</xsl:template>
|
||||
<!-- style -->
|
||||
<xsl:template match="fb:style">
|
||||
<span class="{@name}"><xsl:apply-templates/></span>
|
||||
</xsl:template>
|
||||
<!-- empty-line -->
|
||||
<xsl:template match="fb:empty-line">
|
||||
<br/>
|
||||
</xsl:template>
|
||||
<!-- super/sub-scripts -->
|
||||
<xsl:template match="fb:sup">
|
||||
<sup><xsl:apply-templates/></sup>
|
||||
@ -261,123 +262,140 @@
|
||||
<xsl:template match="fb:sub">
|
||||
<sub><xsl:apply-templates/></sub>
|
||||
</xsl:template>
|
||||
<!-- link -->
|
||||
<xsl:template match="fb:a">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="href"><xsl:value-of select="@xlink:href"/></xsl:attribute>
|
||||
<xsl:attribute name="title">
|
||||
<xsl:choose>
|
||||
<xsl:when test="starts-with(@xlink:href,'#')"><xsl:value-of select="key('note-link',substring-after(@xlink:href,'#'))/fb:p"/></xsl:when>
|
||||
<xsl:otherwise><xsl:value-of select="key('note-link',@xlink:href)/fb:p"/></xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:attribute>
|
||||
<xsl:choose>
|
||||
<xsl:when test="(@type) = 'note'">
|
||||
<sup>
|
||||
<xsl:apply-templates/>
|
||||
</sup>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<!-- annotation -->
|
||||
<xsl:template name="annotation">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<h3>Annotation</h3>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
<!-- epigraph -->
|
||||
<xsl:template match="fb:epigraph">
|
||||
<blockquote class="epigraph">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</blockquote>
|
||||
</xsl:template>
|
||||
<!-- epigraph/text-author -->
|
||||
<xsl:template match="fb:epigraph/fb:text-author">
|
||||
<blockquote>
|
||||
<i><xsl:apply-templates/></i>
|
||||
</blockquote>
|
||||
</xsl:template>
|
||||
<!-- cite -->
|
||||
<xsl:template match="fb:cite">
|
||||
<blockquote>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</blockquote>
|
||||
</xsl:template>
|
||||
<!-- cite/text-author -->
|
||||
<xsl:template match="fb:text-author">
|
||||
<blockquote>
|
||||
<i> <xsl:apply-templates/></i></blockquote>
|
||||
</xsl:template>
|
||||
<!-- date -->
|
||||
<xsl:template match="fb:date">
|
||||
<xsl:choose>
|
||||
<xsl:when test="not(@value)">
|
||||
   <xsl:apply-templates/>
|
||||
<br/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
   <xsl:value-of select="@value"/>
|
||||
<br/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- poem -->
|
||||
<xsl:template match="fb:poem">
|
||||
<blockquote>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</blockquote>
|
||||
</xsl:template>
|
||||
<!-- link -->
|
||||
<xsl:template match="fb:a">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="href"><xsl:value-of select="@xlink:href"/></xsl:attribute>
|
||||
<xsl:attribute name="title">
|
||||
<xsl:choose>
|
||||
<xsl:when test="starts-with(@xlink:href,'#')"><xsl:value-of select="key('note-link',substring-after(@xlink:href,'#'))/fb:p"/></xsl:when>
|
||||
<xsl:otherwise><xsl:value-of select="key('note-link',@xlink:href)/fb:p"/></xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:attribute>
|
||||
<xsl:choose>
|
||||
<xsl:when test="(@type) = 'note'">
|
||||
<sup>
|
||||
<xsl:apply-templates/>
|
||||
</sup>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<!-- annotation -->
|
||||
<xsl:template name="annotation">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<h3>Annotation</h3>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
<!-- tables -->
|
||||
<xsl:template match="fb:table">
|
||||
<table>
|
||||
<xsl:apply-templates/>
|
||||
</table>
|
||||
</xsl:template>
|
||||
<xsl:template match="fb:tr">
|
||||
<tr><xsl:apply-templates/></tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="fb:td">
|
||||
<xsl:element name="td">
|
||||
<xsl:if test="@align">
|
||||
<xsl:attribute name="align"><xsl:value-of select="@align"/></xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
<!-- epigraph -->
|
||||
<xsl:template match="fb:epigraph">
|
||||
<blockquote class="epigraph">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</blockquote>
|
||||
</xsl:template>
|
||||
<!-- epigraph/text-author -->
|
||||
<xsl:template match="fb:epigraph/fb:text-author">
|
||||
<blockquote>
|
||||
<i><xsl:apply-templates/></i>
|
||||
</blockquote>
|
||||
</xsl:template>
|
||||
<!-- cite -->
|
||||
<xsl:template match="fb:cite">
|
||||
<blockquote>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</blockquote>
|
||||
</xsl:template>
|
||||
<!-- cite/text-author -->
|
||||
<xsl:template match="fb:text-author">
|
||||
<blockquote>
|
||||
<i> <xsl:apply-templates/></i></blockquote>
|
||||
</xsl:template>
|
||||
<!-- date -->
|
||||
<xsl:template match="fb:date">
|
||||
<xsl:choose>
|
||||
<xsl:when test="not(@value)">
|
||||
   <xsl:apply-templates/>
|
||||
<br/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
   <xsl:value-of select="@value"/>
|
||||
<br/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- poem -->
|
||||
<xsl:template match="fb:poem">
|
||||
<blockquote>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</blockquote>
|
||||
</xsl:template>
|
||||
|
||||
<!-- stanza -->
|
||||
<xsl:template match="fb:stanza">
|
||||
<xsl:apply-templates/>
|
||||
<br/>
|
||||
</xsl:template>
|
||||
<!-- v -->
|
||||
<xsl:template match="fb:v">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/><br/>
|
||||
</xsl:template>
|
||||
<!-- image -->
|
||||
<xsl:template match="fb:image">
|
||||
<div align="center">
|
||||
<img border="1">
|
||||
<xsl:choose>
|
||||
<xsl:when test="starts-with(@xlink:href,'#')">
|
||||
<xsl:attribute name="src"><xsl:value-of select="substring-after(@xlink:href,'#')"/></xsl:attribute>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:attribute name="src"><xsl:value-of select="@xlink:href"/></xsl:attribute>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</img>
|
||||
</div>
|
||||
</xsl:template>
|
||||
<!-- stanza -->
|
||||
<xsl:template match="fb:stanza">
|
||||
<xsl:apply-templates/>
|
||||
<br/>
|
||||
</xsl:template>
|
||||
<!-- v -->
|
||||
<xsl:template match="fb:v">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/><br/>
|
||||
</xsl:template>
|
||||
<!-- image -->
|
||||
<xsl:template match="fb:image">
|
||||
<div align="center">
|
||||
<img border="1">
|
||||
<xsl:choose>
|
||||
<xsl:when test="starts-with(@xlink:href,'#')">
|
||||
<xsl:attribute name="src"><xsl:value-of select="substring-after(@xlink:href,'#')"/></xsl:attribute>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:attribute name="src"><xsl:value-of select="@xlink:href"/></xsl:attribute>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</img>
|
||||
</div>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
||||
|
@ -63,8 +63,9 @@ def osx_version():
|
||||
if m:
|
||||
return int(m.group(1)), int(m.group(2)), int(m.group(3))
|
||||
|
||||
|
||||
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
|
||||
_filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
|
||||
u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32))))
|
||||
|
||||
def sanitize_file_name(name, substitute='_', as_unicode=False):
|
||||
'''
|
||||
@ -85,8 +86,35 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
|
||||
one = one.decode(filesystem_encoding)
|
||||
one = one.replace('..', substitute)
|
||||
# Windows doesn't like path components that end with a period
|
||||
if one.endswith('.'):
|
||||
if one and one[-1] in ('.', ' '):
|
||||
one = one[:-1]+'_'
|
||||
# Names starting with a period are hidden on Unix
|
||||
if one.startswith('.'):
|
||||
one = '_' + one[1:]
|
||||
return one
|
||||
|
||||
def sanitize_file_name_unicode(name, substitute='_'):
|
||||
'''
|
||||
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
|
||||
The set of invalid characters is the union of the invalid characters in Windows,
|
||||
OS X and Linux. Also removes leading and trailing whitespace.
|
||||
**WARNING:** This function also replaces path separators, so only pass file names
|
||||
and not full paths to it.
|
||||
'''
|
||||
if not isinstance(name, unicode):
|
||||
return sanitize_file_name(name, substitute=substitute, as_unicode=True)
|
||||
chars = [substitute if c in _filename_sanitize_unicode else c for c in
|
||||
name]
|
||||
one = u''.join(chars)
|
||||
one = re.sub(r'\s', ' ', one).strip()
|
||||
one = re.sub(r'^\.+$', '_', one)
|
||||
one = one.replace('..', substitute)
|
||||
# Windows doesn't like path components that end with a period or space
|
||||
if one and one[-1] in ('.', ' '):
|
||||
one = one[:-1]+'_'
|
||||
# Names starting with a period are hidden on Unix
|
||||
if one.startswith('.'):
|
||||
one = '_' + one[1:]
|
||||
return one
|
||||
|
||||
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.7.48'
|
||||
__version__ = '0.7.49'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re
|
||||
|
@ -92,7 +92,7 @@ class TXT2TXTZ(FileTypePlugin):
|
||||
'containing Markdown or Textile references to images. The referenced '
|
||||
'images as well as the TXT file are added to the archive.')
|
||||
version = numeric_version
|
||||
file_types = set(['txt'])
|
||||
file_types = set(['txt', 'text'])
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
on_import = True
|
||||
|
||||
|
@ -35,7 +35,7 @@ class ANDROID(USBMS):
|
||||
# Motorola
|
||||
0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
|
||||
0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
|
||||
0x4286 : [0x216], 0x42b3 : [0x216] },
|
||||
0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216] },
|
||||
|
||||
# Sony Ericsson
|
||||
0xfce : { 0xd12e : [0x0100]},
|
||||
@ -57,7 +57,7 @@ class ANDROID(USBMS):
|
||||
0x413c : { 0xb007 : [0x0100, 0x0224]},
|
||||
|
||||
# LG
|
||||
0x1004 : { 0x61cc : [0x100] },
|
||||
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100] },
|
||||
|
||||
# Archos
|
||||
0x0e79 : {
|
||||
@ -78,6 +78,9 @@ class ANDROID(USBMS):
|
||||
# Xperia
|
||||
0x13d3 : { 0x3304 : [0x0001, 0x0002] },
|
||||
|
||||
# CREEL?? Also Nextbook
|
||||
0x5e3 : { 0x726 : [0x222] },
|
||||
|
||||
}
|
||||
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
|
||||
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of directories to '
|
||||
@ -93,7 +96,8 @@ class ANDROID(USBMS):
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', '7']
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||
'7', 'A956']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7']
|
||||
|
@ -224,7 +224,7 @@ class TREKSTOR(USBMS):
|
||||
FORMATS = ['epub', 'txt', 'pdf']
|
||||
|
||||
VENDOR_ID = [0x1e68]
|
||||
PRODUCT_ID = [0x0041]
|
||||
PRODUCT_ID = [0x0041, 0x0042]
|
||||
BCD = [0x0002]
|
||||
|
||||
EBOOK_DIR_MAIN = 'Ebooks'
|
||||
|
@ -213,7 +213,7 @@ def main():
|
||||
|
||||
for d in connected_devices:
|
||||
try:
|
||||
d.open()
|
||||
d.open(None)
|
||||
except:
|
||||
continue
|
||||
else:
|
||||
|
@ -25,7 +25,7 @@ class DRMError(ValueError):
|
||||
class ParserError(ValueError):
|
||||
pass
|
||||
|
||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'htm', 'xhtm',
|
||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
|
||||
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
||||
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb']
|
||||
|
@ -22,7 +22,7 @@ class CHMInput(InputFormatPlugin):
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||
from calibre.ebooks.chm.reader import CHMReader
|
||||
log.debug('Opening CHM file')
|
||||
rdr = CHMReader(chm_path, log)
|
||||
rdr = CHMReader(chm_path, log, self.opts)
|
||||
log.debug('Extracting CHM to %s' % output_dir)
|
||||
rdr.extract_content(output_dir)
|
||||
self._chm_reader = rdr
|
||||
@ -32,13 +32,13 @@ class CHMInput(InputFormatPlugin):
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.chm.metadata import get_metadata_from_reader
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
self.opts = options
|
||||
|
||||
log.debug('Processing CHM...')
|
||||
with TemporaryDirectory('_chm2oeb') as tdir:
|
||||
html_input = plugin_for_input_format('html')
|
||||
for opt in html_input.options:
|
||||
setattr(options, opt.option.name, opt.recommended_value)
|
||||
options.input_encoding = 'utf-8'
|
||||
no_images = False #options.no_images
|
||||
chm_name = stream.name
|
||||
#chm_data = stream.read()
|
||||
@ -54,6 +54,7 @@ class CHMInput(InputFormatPlugin):
|
||||
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
options.input_encoding = 'utf-8'
|
||||
# try a custom conversion:
|
||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||
# try using html converter:
|
||||
|
@ -40,13 +40,14 @@ class CHMError(Exception):
|
||||
pass
|
||||
|
||||
class CHMReader(CHMFile):
|
||||
def __init__(self, input, log):
|
||||
def __init__(self, input, log, opts):
|
||||
CHMFile.__init__(self)
|
||||
if isinstance(input, unicode):
|
||||
input = input.encode(filesystem_encoding)
|
||||
if not self.LoadCHM(input):
|
||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||
self.log = log
|
||||
self.opts = opts
|
||||
self._sourcechm = input
|
||||
self._contents = None
|
||||
self._playorder = 0
|
||||
@ -151,6 +152,8 @@ class CHMReader(CHMFile):
|
||||
break
|
||||
|
||||
def _reformat(self, data, htmlpath):
|
||||
if self.opts.input_encoding:
|
||||
data = data.decode(self.opts.input_encoding)
|
||||
try:
|
||||
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||
soup = BeautifulSoup(data)
|
||||
|
@ -131,9 +131,12 @@ class PageProcessor(list): # {{{
|
||||
newsizey = int(newsizex / aspect)
|
||||
deltax = 0
|
||||
deltay = (SCRHEIGHT - newsizey) / 2
|
||||
wand.size = (newsizex, newsizey)
|
||||
wand.set_border_color(pw)
|
||||
wand.add_border(pw, deltax, deltay)
|
||||
if newsizex < 20000 and newsizey < 20000:
|
||||
# Too large and resizing fails, so better
|
||||
# to leave it as original size
|
||||
wand.size = (newsizex, newsizey)
|
||||
wand.set_border_color(pw)
|
||||
wand.add_border(pw, deltax, deltay)
|
||||
elif self.opts.wide:
|
||||
# Keep aspect and Use device height as scaled image width so landscape mode is clean
|
||||
aspect = float(sizex) / float(sizey)
|
||||
@ -152,11 +155,15 @@ class PageProcessor(list): # {{{
|
||||
newsizey = int(newsizex / aspect)
|
||||
deltax = 0
|
||||
deltay = (wscreeny - newsizey) / 2
|
||||
wand.size = (newsizex, newsizey)
|
||||
wand.set_border_color(pw)
|
||||
wand.add_border(pw, deltax, deltay)
|
||||
if newsizex < 20000 and newsizey < 20000:
|
||||
# Too large and resizing fails, so better
|
||||
# to leave it as original size
|
||||
wand.size = (newsizex, newsizey)
|
||||
wand.set_border_color(pw)
|
||||
wand.add_border(pw, deltax, deltay)
|
||||
else:
|
||||
wand.size = (SCRWIDTH, SCRHEIGHT)
|
||||
if SCRWIDTH < 20000 and SCRHEIGHT < 20000:
|
||||
wand.size = (SCRWIDTH, SCRHEIGHT)
|
||||
|
||||
if not self.opts.dont_sharpen:
|
||||
wand.sharpen(0.0, 1.0)
|
||||
|
@ -72,7 +72,7 @@ class FB2MLizer(object):
|
||||
|
||||
def clean_text(self, text):
|
||||
# Condense empty paragraphs into a line break.
|
||||
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<p><empty-line /></p>', text)
|
||||
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line />', text)
|
||||
# Remove empty paragraphs.
|
||||
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
|
||||
# Clean up pargraph endings.
|
||||
@ -101,9 +101,6 @@ class FB2MLizer(object):
|
||||
|
||||
def fb2_header(self):
|
||||
metadata = {}
|
||||
metadata['author_first'] = u''
|
||||
metadata['author_middle'] = u''
|
||||
metadata['author_last'] = u''
|
||||
metadata['title'] = self.oeb_book.metadata.title[0].value
|
||||
metadata['appname'] = __appname__
|
||||
metadata['version'] = __version__
|
||||
@ -115,16 +112,36 @@ class FB2MLizer(object):
|
||||
metadata['id'] = None
|
||||
metadata['cover'] = self.get_cover()
|
||||
|
||||
author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
|
||||
if len(author_parts) == 1:
|
||||
metadata['author_last'] = author_parts[0]
|
||||
elif len(author_parts) == 2:
|
||||
metadata['author_first'] = author_parts[0]
|
||||
metadata['author_last'] = author_parts[1]
|
||||
else:
|
||||
metadata['author_first'] = author_parts[0]
|
||||
metadata['author_middle'] = ' '.join(author_parts[1:-2])
|
||||
metadata['author_last'] = author_parts[-1]
|
||||
metadata['author'] = u''
|
||||
for auth in self.oeb_book.metadata.creator:
|
||||
author_first = u''
|
||||
author_middle = u''
|
||||
author_last = u''
|
||||
author_parts = auth.value.split(' ')
|
||||
if len(author_parts) == 1:
|
||||
author_last = author_parts[0]
|
||||
elif len(author_parts) == 2:
|
||||
author_first = author_parts[0]
|
||||
author_last = author_parts[1]
|
||||
else:
|
||||
author_first = author_parts[0]
|
||||
author_middle = ' '.join(author_parts[1:-1])
|
||||
author_last = author_parts[-1]
|
||||
metadata['author'] += '<author>'
|
||||
metadata['author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(author_first)
|
||||
if author_middle:
|
||||
metadata['author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(author_middle)
|
||||
metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
|
||||
metadata['author'] += '</author>'
|
||||
if not metadata['author']:
|
||||
metadata['author'] = u'<author><first-name></first-name><last-name><last-name></author>'
|
||||
|
||||
metadata['sequence'] = u''
|
||||
if self.oeb_book.metadata.series:
|
||||
index = '1'
|
||||
if self.oeb_book.metadata.series_index:
|
||||
index = self.oeb_book.metadata.series_index[0]
|
||||
metadata['sequence'] = u'<sequence name="%s" number="%s" />' % (prepare_string_for_xml(u'%s' % self.oeb_book.metadata.series[0]), index)
|
||||
|
||||
identifiers = self.oeb_book.metadata['identifier']
|
||||
for x in identifiers:
|
||||
@ -136,28 +153,21 @@ class FB2MLizer(object):
|
||||
metadata['id'] = str(uuid.uuid4())
|
||||
|
||||
for key, value in metadata.items():
|
||||
if not key == 'cover':
|
||||
if key not in ('author', 'cover', 'sequence'):
|
||||
metadata[key] = prepare_string_for_xml(value)
|
||||
|
||||
return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
|
||||
'<description>' \
|
||||
'<title-info>' \
|
||||
'<genre>antique</genre>' \
|
||||
'<author>' \
|
||||
'<first-name>%(author_first)s</first-name>' \
|
||||
'<middle-name>%(author_middle)s</middle-name>' \
|
||||
'<last-name>%(author_last)s</last-name>' \
|
||||
'</author>' \
|
||||
'%(author)s' \
|
||||
'<book-title>%(title)s</book-title>' \
|
||||
'%(cover)s' \
|
||||
'<lang>%(lang)s</lang>' \
|
||||
'%(sequence)s' \
|
||||
'</title-info>' \
|
||||
'<document-info>' \
|
||||
'<author>' \
|
||||
'<first-name></first-name>' \
|
||||
'<middle-name></middle-name>' \
|
||||
'<last-name></last-name>' \
|
||||
'</author>' \
|
||||
'%(author)s' \
|
||||
'<program-used>%(appname)s %(version)s</program-used>' \
|
||||
'<date>%(date)s</date>' \
|
||||
'<id>%(id)s</id>' \
|
||||
|
@ -23,8 +23,9 @@ cover_url_cache = {}
|
||||
cache_lock = RLock()
|
||||
|
||||
def find_asin(br, isbn):
|
||||
q = 'http://www.amazon.com/s?field-keywords='+isbn
|
||||
raw = br.open_novisit(q).read()
|
||||
q = 'http://www.amazon.com/s/?search-alias=aps&field-keywords='+isbn
|
||||
res = br.open_novisit(q)
|
||||
raw = res.read()
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
resolve_entities=True)[0]
|
||||
root = html.fromstring(raw)
|
||||
@ -151,6 +152,8 @@ def get_metadata(br, asin, mi):
|
||||
root = soupparser.fromstring(raw)
|
||||
except:
|
||||
return False
|
||||
if root.xpath('//*[@id="errorMessage"]'):
|
||||
return False
|
||||
ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
|
||||
if ratings:
|
||||
pat = re.compile(r'([0-9.]+) out of (\d+) stars')
|
||||
@ -191,6 +194,7 @@ def main(args=sys.argv):
|
||||
tdir = tempfile.gettempdir()
|
||||
br = browser()
|
||||
for title, isbn in [
|
||||
('The Heroes', '9780316044981'), # Test find_asin
|
||||
('Learning Python', '8324616489'), # Test xisbn
|
||||
('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
|
||||
# Random tests
|
||||
@ -207,8 +211,12 @@ def main(args=sys.argv):
|
||||
|
||||
#import time
|
||||
#st = time.time()
|
||||
print get_social_metadata(title, None, None, isbn)
|
||||
mi = get_social_metadata(title, None, None, isbn)
|
||||
if not mi.comments:
|
||||
print 'Failed to downlaod social metadata for', title
|
||||
return 1
|
||||
#print '\n\n', time.time() - st, '\n\n'
|
||||
print '\n'
|
||||
|
||||
return 0
|
||||
|
||||
|
@ -130,7 +130,7 @@ class Metadata(object):
|
||||
self.set_identifiers(val)
|
||||
elif field in STANDARD_METADATA_FIELDS:
|
||||
if val is None:
|
||||
val = NULL_VALUES.get(field, None)
|
||||
val = copy.copy(NULL_VALUES.get(field, None))
|
||||
_data[field] = val
|
||||
elif field in _data['user_metadata'].iterkeys():
|
||||
_data['user_metadata'][field]['#value#'] = val
|
||||
|
@ -74,6 +74,8 @@ class HeadRequest(mechanize.Request):
|
||||
class OpenLibraryCovers(CoverDownload): # {{{
|
||||
'Download covers from openlibrary.org'
|
||||
|
||||
# See http://openlibrary.org/dev/docs/api/covers
|
||||
|
||||
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
|
||||
name = 'openlibrary.org covers'
|
||||
description = _('Download covers from openlibrary.org')
|
||||
@ -82,7 +84,8 @@ class OpenLibraryCovers(CoverDownload): # {{{
|
||||
def has_cover(self, mi, ans, timeout=5.):
|
||||
if not mi.isbn:
|
||||
return False
|
||||
br = browser()
|
||||
from calibre.ebooks.metadata.library_thing import get_browser
|
||||
br = get_browser()
|
||||
br.set_handle_redirect(False)
|
||||
try:
|
||||
br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout)
|
||||
@ -98,7 +101,8 @@ class OpenLibraryCovers(CoverDownload): # {{{
|
||||
def get_covers(self, mi, result_queue, abort, timeout=5.):
|
||||
if not mi.isbn:
|
||||
return
|
||||
br = browser()
|
||||
from calibre.ebooks.metadata.library_thing import get_browser
|
||||
br = get_browser()
|
||||
try:
|
||||
ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read()
|
||||
result_queue.put((True, ans, 'jpg', self.name))
|
||||
@ -137,6 +141,8 @@ class AmazonCovers(CoverDownload): # {{{
|
||||
br = browser()
|
||||
try:
|
||||
url = get_cover_url(mi.isbn, br)
|
||||
if url is None:
|
||||
raise ValueError('No cover found for ISBN: %s'%mi.isbn)
|
||||
cover_data = br.open_novisit(url).read()
|
||||
result_queue.put((True, cover_data, 'jpg', self.name))
|
||||
except Exception, e:
|
||||
|
@ -908,6 +908,19 @@ class Manifest(object):
|
||||
pass
|
||||
data = first_pass(data)
|
||||
|
||||
if data.tag == 'HTML':
|
||||
# Lower case all tag and attribute names
|
||||
data.tag = data.tag.lower()
|
||||
for x in data.iterdescendants():
|
||||
try:
|
||||
x.tag = x.tag.lower()
|
||||
for key, val in list(x.attrib.iteritems()):
|
||||
del x.attrib[key]
|
||||
key = key.lower()
|
||||
x.attrib[key] = val
|
||||
except:
|
||||
pass
|
||||
|
||||
# Handle weird (non-HTML/fragment) files
|
||||
if barename(data.tag) != 'html':
|
||||
self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)
|
||||
|
@ -8,11 +8,7 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import os
|
||||
import itertools
|
||||
import re
|
||||
import logging
|
||||
import copy
|
||||
import os, itertools, re, logging, copy, unicodedata
|
||||
from weakref import WeakKeyDictionary
|
||||
from xml.dom import SyntaxErr as CSSSyntaxError
|
||||
import cssutils
|
||||
@ -234,8 +230,18 @@ class Stylizer(object):
|
||||
for elem in matches:
|
||||
for x in elem.iter():
|
||||
if x.text:
|
||||
span = E.span(x.text[0])
|
||||
span.tail = x.text[1:]
|
||||
punctuation_chars = []
|
||||
text = unicode(x.text)
|
||||
while text:
|
||||
if not unicodedata.category(text[0]).startswith('P'):
|
||||
break
|
||||
punctuation_chars.append(text[0])
|
||||
text = text[1:]
|
||||
|
||||
special_text = u''.join(punctuation_chars) + \
|
||||
(text[0] if text else u'')
|
||||
span = E.span(special_text)
|
||||
span.tail = text[1:]
|
||||
x.text = None
|
||||
x.insert(0, span)
|
||||
self.style(span)._update_cssdict(cssdict)
|
||||
|
@ -13,6 +13,7 @@ from urlparse import urlparse
|
||||
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
|
||||
from calibre.ebooks import ConversionError
|
||||
from calibre.utils.ordered_dict import OrderedDict
|
||||
|
||||
def XPath(x):
|
||||
try:
|
||||
@ -95,10 +96,8 @@ class DetectStructure(object):
|
||||
self.log.exception('Failed to mark chapter')
|
||||
|
||||
def create_level_based_toc(self):
|
||||
if self.opts.level1_toc is None:
|
||||
return
|
||||
for item in self.oeb.spine:
|
||||
self.add_leveled_toc_items(item)
|
||||
if self.opts.level1_toc is not None:
|
||||
self.add_leveled_toc_items()
|
||||
|
||||
def create_toc_from_chapters(self):
|
||||
counter = self.oeb.toc.next_play_order()
|
||||
@ -145,49 +144,57 @@ class DetectStructure(object):
|
||||
return text, href
|
||||
|
||||
|
||||
def add_leveled_toc_items(self, item):
|
||||
level1 = XPath(self.opts.level1_toc)(item.data)
|
||||
level1_order = []
|
||||
document = item
|
||||
|
||||
def add_leveled_toc_items(self):
|
||||
added = OrderedDict()
|
||||
added2 = OrderedDict()
|
||||
counter = 1
|
||||
if level1:
|
||||
added = {}
|
||||
for elem in level1:
|
||||
for document in self.oeb.spine:
|
||||
previous_level1 = list(added.itervalues())[-1] if added else None
|
||||
previous_level2 = list(added2.itervalues())[-1] if added2 else None
|
||||
|
||||
for elem in XPath(self.opts.level1_toc)(document.data):
|
||||
text, _href = self.elem_to_link(document, elem, counter)
|
||||
counter += 1
|
||||
if text:
|
||||
node = self.oeb.toc.add(text, _href,
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
level1_order.append(node)
|
||||
added[elem] = node
|
||||
#node.add(_('Top'), _href)
|
||||
if self.opts.level2_toc is not None:
|
||||
added2 = {}
|
||||
level2 = list(XPath(self.opts.level2_toc)(document.data))
|
||||
for elem in level2:
|
||||
|
||||
if self.opts.level2_toc is not None and added:
|
||||
for elem in XPath(self.opts.level2_toc)(document.data):
|
||||
level1 = None
|
||||
for item in document.data.iterdescendants():
|
||||
if item in added.keys():
|
||||
if item in added:
|
||||
level1 = added[item]
|
||||
elif item == elem and level1 is not None:
|
||||
elif item == elem:
|
||||
if level1 is None:
|
||||
if previous_level1 is None:
|
||||
break
|
||||
level1 = previous_level1
|
||||
text, _href = self.elem_to_link(document, elem, counter)
|
||||
counter += 1
|
||||
if text:
|
||||
added2[elem] = level1.add(text, _href,
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
if self.opts.level3_toc is not None:
|
||||
level3 = list(XPath(self.opts.level3_toc)(document.data))
|
||||
for elem in level3:
|
||||
break
|
||||
|
||||
if self.opts.level3_toc is not None and added2:
|
||||
for elem in XPath(self.opts.level3_toc)(document.data):
|
||||
level2 = None
|
||||
for item in document.data.iterdescendants():
|
||||
if item in added2.keys():
|
||||
if item in added2:
|
||||
level2 = added2[item]
|
||||
elif item == elem and level2 is not None:
|
||||
elif item == elem:
|
||||
if level2 is None:
|
||||
if previous_level2 is None:
|
||||
break
|
||||
level2 = previous_level2
|
||||
text, _href = \
|
||||
self.elem_to_link(document, elem, counter)
|
||||
counter += 1
|
||||
if text:
|
||||
level2.add(text, _href,
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
break
|
||||
|
||||
|
@ -46,7 +46,8 @@ def get_pdf_printer(opts, for_comic=False):
|
||||
printer = QPrinter(QPrinter.HighResolution)
|
||||
custom_size = get_custom_size(opts)
|
||||
|
||||
if opts.output_profile.short_name == 'default':
|
||||
if opts.output_profile.short_name == 'default' or \
|
||||
opts.output_profile.width > 10000:
|
||||
if custom_size is None:
|
||||
printer.setPaperSize(paper_size(opts.paper_size))
|
||||
else:
|
||||
|
@ -46,7 +46,8 @@ class Tokenize:
|
||||
|
||||
def __remove_uc_chars(self, startchar, token):
|
||||
for i in xrange(startchar, len(token)):
|
||||
if token[i] == " ":
|
||||
#handle the case of an uc char with a terminating blank before ansi char
|
||||
if token[i] == " " and self.__uc_char:
|
||||
continue
|
||||
elif self.__uc_char:
|
||||
self.__uc_char -= 1
|
||||
|
@ -75,15 +75,20 @@ class SNBFile:
|
||||
for i in range(self.plainBlock):
|
||||
bzdc = bz2.BZ2Decompressor()
|
||||
if (i < self.plainBlock - 1):
|
||||
bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset;
|
||||
bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset
|
||||
else:
|
||||
bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset;
|
||||
snbFile.seek(self.blocks[self.binBlock + i].Offset);
|
||||
bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset
|
||||
snbFile.seek(self.blocks[self.binBlock + i].Offset)
|
||||
try:
|
||||
data = snbFile.read(bSize)
|
||||
uncompressedData += bzdc.decompress(data)
|
||||
if len(data) < 32768:
|
||||
uncompressedData += bzdc.decompress(data)
|
||||
else:
|
||||
uncompressedData += data
|
||||
except Exception, e:
|
||||
print e
|
||||
if len(uncompressedData) != self.plainStreamSizeUncompressed:
|
||||
raise Exception()
|
||||
f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
|
||||
plainPos += f.fileSize
|
||||
elif f.attr & 0x01000000 == 0x01000000:
|
||||
|
@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
|
||||
name = 'TXT Input'
|
||||
author = 'John Schember'
|
||||
description = 'Convert TXT files to HTML'
|
||||
file_types = set(['txt', 'txtz'])
|
||||
file_types = set(['txt', 'txtz', 'text'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='paragraph_type', recommended_value='auto',
|
||||
@ -65,7 +65,6 @@ class TXTInput(InputFormatPlugin):
|
||||
txt = ''
|
||||
log.debug('Reading text from file...')
|
||||
length = 0
|
||||
# [(u'path', mime),]
|
||||
|
||||
# Extract content from zip archive.
|
||||
if file_ext == 'txtz':
|
||||
@ -73,7 +72,7 @@ class TXTInput(InputFormatPlugin):
|
||||
zf.extractall('.')
|
||||
|
||||
for x in walk('.'):
|
||||
if os.path.splitext(x)[1].lower() == '.txt':
|
||||
if os.path.splitext(x)[1].lower() in ('.txt', '.text'):
|
||||
with open(x, 'rb') as tf:
|
||||
txt += tf.read() + '\n\n'
|
||||
else:
|
||||
|
@ -340,6 +340,7 @@ class FileIconProvider(QFileIconProvider):
|
||||
'rar' : 'rar',
|
||||
'zip' : 'zip',
|
||||
'txt' : 'txt',
|
||||
'text' : 'txt',
|
||||
'prc' : 'mobi',
|
||||
'azw' : 'mobi',
|
||||
'mobi' : 'mobi',
|
||||
|
@ -204,15 +204,29 @@ class AddAction(InterfaceAction):
|
||||
to_device = self.gui.stack.currentIndex() != 0
|
||||
self._add_books(paths, to_device)
|
||||
|
||||
def files_dropped_on_book(self, event, paths):
|
||||
def remote_file_dropped_on_book(self, url, fname):
|
||||
if self.gui.current_view() is not self.gui.library_view:
|
||||
return
|
||||
db = self.gui.library_view.model().db
|
||||
current_idx = self.gui.library_view.currentIndex()
|
||||
if not current_idx.isValid(): return
|
||||
cid = db.id(current_idx.row())
|
||||
from calibre.gui2.dnd import DownloadDialog
|
||||
d = DownloadDialog(url, fname, self.gui)
|
||||
d.start_download()
|
||||
if d.err is None:
|
||||
self.files_dropped_on_book(None, [d.fpath], cid=cid)
|
||||
|
||||
def files_dropped_on_book(self, event, paths, cid=None):
|
||||
accept = False
|
||||
if self.gui.current_view() is not self.gui.library_view:
|
||||
return
|
||||
db = self.gui.library_view.model().db
|
||||
cover_changed = False
|
||||
current_idx = self.gui.library_view.currentIndex()
|
||||
if not current_idx.isValid(): return
|
||||
cid = db.id(current_idx.row())
|
||||
if cid is None:
|
||||
if not current_idx.isValid(): return
|
||||
cid = db.id(current_idx.row()) if cid is None else cid
|
||||
for path in paths:
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if ext:
|
||||
@ -227,8 +241,9 @@ class AddAction(InterfaceAction):
|
||||
elif ext in BOOK_EXTENSIONS:
|
||||
db.add_format_with_hooks(cid, ext, path, index_is_id=True)
|
||||
accept = True
|
||||
if accept:
|
||||
if accept and event is not None:
|
||||
event.accept()
|
||||
if current_idx.isValid():
|
||||
self.gui.library_view.model().current_changed(current_idx, current_idx)
|
||||
if cover_changed:
|
||||
if self.gui.cover_flow:
|
||||
|
@ -11,7 +11,6 @@ from PyQt4.Qt import QWizard, QWizardPage, QIcon, QPixmap, Qt, QThread, \
|
||||
pyqtSignal
|
||||
|
||||
from calibre.gui2 import error_dialog, choose_dir, gprefs
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.library.add_to_library import find_folders_under, \
|
||||
find_books_in_folder, hash_merge_format_collections
|
||||
|
||||
@ -122,20 +121,19 @@ class WelcomePage(WizardPage, WelcomeWidget):
|
||||
x = unicode(self.opt_root_folder.text()).strip()
|
||||
if not x:
|
||||
return None
|
||||
return os.path.abspath(x.encode(filesystem_encoding))
|
||||
return os.path.abspath(x)
|
||||
|
||||
def get_one_per_folder(self):
|
||||
return self.opt_one_per_folder.isChecked()
|
||||
|
||||
def validatePage(self):
|
||||
x = self.get_root_folder()
|
||||
xu = x.decode(filesystem_encoding)
|
||||
if x and os.access(x, os.R_OK) and os.path.isdir(x):
|
||||
gprefs['add wizard root folder'] = xu
|
||||
gprefs['add wizard root folder'] = x
|
||||
gprefs['add wizard one per folder'] = self.get_one_per_folder()
|
||||
return True
|
||||
error_dialog(self, _('Invalid root folder'),
|
||||
xu + _('is not a valid root folder'), show=True)
|
||||
x + _('is not a valid root folder'), show=True)
|
||||
return False
|
||||
|
||||
# }}}
|
||||
|
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, collections, sys
|
||||
import collections, sys
|
||||
from Queue import Queue
|
||||
|
||||
from PyQt4.Qt import QPixmap, QSize, QWidget, Qt, pyqtSignal, QUrl, \
|
||||
@ -14,7 +14,8 @@ from PyQt4.Qt import QPixmap, QSize, QWidget, Qt, pyqtSignal, QUrl, \
|
||||
from PyQt4.QtWebKit import QWebView
|
||||
|
||||
from calibre import fit_image, prepare_string_for_xml
|
||||
from calibre.gui2.widgets import IMAGE_EXTENSIONS
|
||||
from calibre.gui2.dnd import dnd_has_image, dnd_get_image, dnd_get_files, \
|
||||
IMAGE_EXTENSIONS, dnd_has_extension
|
||||
from calibre.ebooks import BOOK_EXTENSIONS
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.library.comments import comments_to_html
|
||||
@ -165,11 +166,12 @@ class CoverView(QWidget): # {{{
|
||||
def copy_to_clipboard(self):
|
||||
QApplication.instance().clipboard().setPixmap(self.pixmap)
|
||||
|
||||
def paste_from_clipboard(self):
|
||||
cb = QApplication.instance().clipboard()
|
||||
pmap = cb.pixmap()
|
||||
if pmap.isNull() and cb.supportsSelection():
|
||||
pmap = cb.pixmap(cb.Selection)
|
||||
def paste_from_clipboard(self, pmap=None):
|
||||
if not isinstance(pmap, QPixmap):
|
||||
cb = QApplication.instance().clipboard()
|
||||
pmap = cb.pixmap()
|
||||
if pmap.isNull() and cb.supportsSelection():
|
||||
pmap = cb.pixmap(cb.Selection)
|
||||
if not pmap.isNull():
|
||||
self.pixmap = pmap
|
||||
self.do_layout()
|
||||
@ -226,6 +228,7 @@ class BookInfo(QWebView):
|
||||
self._link_clicked = False
|
||||
self.setAttribute(Qt.WA_OpaquePaintEvent, False)
|
||||
palette = self.palette()
|
||||
self.setAcceptDrops(False)
|
||||
palette.setBrush(QPalette.Base, Qt.transparent)
|
||||
self.page().setPalette(palette)
|
||||
|
||||
@ -388,36 +391,50 @@ class BookDetails(QWidget): # {{{
|
||||
show_book_info = pyqtSignal()
|
||||
open_containing_folder = pyqtSignal(int)
|
||||
view_specific_format = pyqtSignal(int, object)
|
||||
|
||||
# Drag 'n drop {{{
|
||||
DROPABBLE_EXTENSIONS = IMAGE_EXTENSIONS+BOOK_EXTENSIONS
|
||||
remote_file_dropped = pyqtSignal(object, object)
|
||||
files_dropped = pyqtSignal(object, object)
|
||||
cover_changed = pyqtSignal(object, object)
|
||||
|
||||
# application/x-moz-file-promise-url
|
||||
@classmethod
|
||||
def paths_from_event(cls, event):
|
||||
'''
|
||||
Accept a drop event and return a list of paths that can be read from
|
||||
and represent files with extensions.
|
||||
'''
|
||||
if event.mimeData().hasFormat('text/uri-list'):
|
||||
urls = [unicode(u.toLocalFile()) for u in event.mimeData().urls()]
|
||||
urls = [u for u in urls if os.path.splitext(u)[1] and os.access(u, os.R_OK)]
|
||||
return [u for u in urls if os.path.splitext(u)[1][1:].lower() in cls.DROPABBLE_EXTENSIONS]
|
||||
# Drag 'n drop {{{
|
||||
DROPABBLE_EXTENSIONS = IMAGE_EXTENSIONS+BOOK_EXTENSIONS
|
||||
|
||||
def dragEnterEvent(self, event):
|
||||
if int(event.possibleActions() & Qt.CopyAction) + \
|
||||
int(event.possibleActions() & Qt.MoveAction) == 0:
|
||||
return
|
||||
paths = self.paths_from_event(event)
|
||||
if paths:
|
||||
md = event.mimeData()
|
||||
if dnd_has_extension(md, self.DROPABBLE_EXTENSIONS) or \
|
||||
dnd_has_image(md):
|
||||
event.acceptProposedAction()
|
||||
|
||||
def dropEvent(self, event):
|
||||
paths = self.paths_from_event(event)
|
||||
event.setDropAction(Qt.CopyAction)
|
||||
self.files_dropped.emit(event, paths)
|
||||
md = event.mimeData()
|
||||
|
||||
x, y = dnd_get_image(md)
|
||||
if x is not None:
|
||||
# We have an image, set cover
|
||||
event.accept()
|
||||
if y is None:
|
||||
# Local image
|
||||
self.cover_view.paste_from_clipboard(x)
|
||||
else:
|
||||
self.remote_file_dropped.emit(x, y)
|
||||
# We do not support setting cover *and* adding formats for
|
||||
# a remote drop, anyway, so return
|
||||
return
|
||||
|
||||
# Now look for ebook files
|
||||
urls, filenames = dnd_get_files(md, BOOK_EXTENSIONS)
|
||||
if not urls:
|
||||
# Nothing found
|
||||
return
|
||||
|
||||
if not filenames:
|
||||
# Local files
|
||||
self.files_dropped.emit(event, urls)
|
||||
else:
|
||||
# Remote files, use the first file
|
||||
self.remote_file_dropped.emit(urls[0], filenames[0])
|
||||
event.accept()
|
||||
|
||||
|
||||
def dragMoveEvent(self, event):
|
||||
event.acceptProposedAction()
|
||||
|
@ -43,6 +43,9 @@
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
<property name="sizeAdjustPolicy">
|
||||
<enum>QComboBox::AdjustToMinimumContentsLengthWithIcon</enum>
|
||||
</property>
|
||||
<property name="minimumContentsLength">
|
||||
<number>30</number>
|
||||
</property>
|
||||
|
@ -5,7 +5,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
from functools import partial
|
||||
|
||||
from PyQt4.Qt import QComboBox, QLabel, QSpinBox, QDoubleSpinBox, QDateEdit, \
|
||||
@ -85,7 +84,7 @@ class Int(Base):
|
||||
self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent),
|
||||
QSpinBox(parent)]
|
||||
w = self.widgets[1]
|
||||
w.setRange(-100, sys.maxint)
|
||||
w.setRange(-100, 100000000)
|
||||
w.setSpecialValueText(_('Undefined'))
|
||||
w.setSingleStep(1)
|
||||
|
||||
@ -108,7 +107,7 @@ class Float(Int):
|
||||
self.widgets = [QLabel('&'+self.col_metadata['name']+':', parent),
|
||||
QDoubleSpinBox(parent)]
|
||||
w = self.widgets[1]
|
||||
w.setRange(-100., float(sys.maxint))
|
||||
w.setRange(-100., float(100000000))
|
||||
w.setDecimals(2)
|
||||
w.setSpecialValueText(_('Undefined'))
|
||||
w.setSingleStep(1)
|
||||
@ -289,7 +288,7 @@ class Series(Base):
|
||||
|
||||
self.widgets.append(QLabel('&'+self.col_metadata['name']+_(' index:'), parent))
|
||||
w = QDoubleSpinBox(parent)
|
||||
w.setRange(-100., float(sys.maxint))
|
||||
w.setRange(-100., float(100000000))
|
||||
w.setDecimals(2)
|
||||
w.setSpecialValueText(_('Undefined'))
|
||||
w.setSingleStep(1)
|
||||
@ -595,7 +594,7 @@ class BulkInt(BulkBase):
|
||||
|
||||
def setup_ui(self, parent):
|
||||
self.make_widgets(parent, QSpinBox)
|
||||
self.main_widget.setRange(-100, sys.maxint)
|
||||
self.main_widget.setRange(-100, 100000000)
|
||||
self.main_widget.setSpecialValueText(_('Undefined'))
|
||||
self.main_widget.setSingleStep(1)
|
||||
|
||||
@ -617,7 +616,7 @@ class BulkFloat(BulkInt):
|
||||
|
||||
def setup_ui(self, parent):
|
||||
self.make_widgets(parent, QDoubleSpinBox)
|
||||
self.main_widget.setRange(-100., float(sys.maxint))
|
||||
self.main_widget.setRange(-100., float(100000000))
|
||||
self.main_widget.setDecimals(2)
|
||||
self.main_widget.setSpecialValueText(_('Undefined'))
|
||||
self.main_widget.setSingleStep(1)
|
||||
@ -795,6 +794,7 @@ class BulkEnumeration(BulkBase, Enumeration):
|
||||
return value
|
||||
|
||||
def setup_ui(self, parent):
|
||||
self.parent = parent
|
||||
self.make_widgets(parent, QComboBox)
|
||||
vals = self.col_metadata['display']['enum_values']
|
||||
self.main_widget.blockSignals(True)
|
||||
|
@ -1160,6 +1160,14 @@ class DeviceMixin(object): # {{{
|
||||
), bad)
|
||||
d.exec_()
|
||||
|
||||
def upload_dirtied_booklists(self):
|
||||
'''
|
||||
Upload metadata to device.
|
||||
'''
|
||||
plugboards = self.library_view.model().db.prefs.get('plugboards', {})
|
||||
self.device_manager.sync_booklists(Dispatcher(lambda x: x),
|
||||
self.booklists(), plugboards)
|
||||
|
||||
def upload_booklists(self):
|
||||
'''
|
||||
Upload metadata to device.
|
||||
|
61
src/calibre/gui2/dialogs/choose_plugin_toolbars.py
Normal file
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
|
||||
from PyQt4.Qt import QDialog, QVBoxLayout, QLabel, QDialogButtonBox, \
|
||||
QListWidget, QAbstractItemView
|
||||
from PyQt4 import QtGui
|
||||
|
||||
class ChoosePluginToolbarsDialog(QDialog):
|
||||
|
||||
def __init__(self, parent, plugin, locations):
|
||||
QDialog.__init__(self, parent)
|
||||
self.locations = locations
|
||||
|
||||
self.setWindowTitle(
|
||||
_('Add "%s" to toolbars or menus')%plugin.name)
|
||||
|
||||
self._layout = QVBoxLayout(self)
|
||||
self.setLayout(self._layout)
|
||||
|
||||
self._header_label = QLabel(
|
||||
_('Select the toolbars and/or menus to add <b>%s</b> to:') %
|
||||
plugin.name)
|
||||
self._layout.addWidget(self._header_label)
|
||||
|
||||
self._locations_list = QListWidget(self)
|
||||
self._locations_list.setSelectionMode(QAbstractItemView.MultiSelection)
|
||||
sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Preferred,
|
||||
QtGui.QSizePolicy.Minimum)
|
||||
sizePolicy.setHorizontalStretch(0)
|
||||
sizePolicy.setVerticalStretch(0)
|
||||
self._locations_list.setSizePolicy(sizePolicy)
|
||||
for key, text in locations:
|
||||
self._locations_list.addItem(text)
|
||||
self._layout.addWidget(self._locations_list)
|
||||
|
||||
self._footer_label = QLabel(
|
||||
_('You can also customise the plugin locations '
|
||||
'using <b>Preferences -> Customise the toolbar</b>'))
|
||||
self._layout.addWidget(self._footer_label)
|
||||
|
||||
button_box = QDialogButtonBox(QDialogButtonBox.Ok |
|
||||
QDialogButtonBox.Cancel)
|
||||
button_box.accepted.connect(self.accept)
|
||||
button_box.rejected.connect(self.reject)
|
||||
self._layout.addWidget(button_box)
|
||||
self.resize(self.sizeHint())
|
||||
|
||||
def selected_locations(self):
|
||||
selected = []
|
||||
for row in self._locations_list.selectionModel().selectedRows():
|
||||
selected.append(self.locations[row.row()])
|
||||
return selected
|
||||
|
@ -7,7 +7,7 @@ import re, os, inspect
|
||||
|
||||
from PyQt4.Qt import Qt, QDialog, QGridLayout, QVBoxLayout, QFont, QLabel, \
|
||||
pyqtSignal, QDialogButtonBox, QInputDialog, QLineEdit, \
|
||||
QDate
|
||||
QDate, QCompleter
|
||||
|
||||
from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
|
||||
from calibre.gui2.dialogs.tag_editor import TagEditor
|
||||
@ -364,7 +364,8 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
(fm[f]['datatype'] in ['text', 'series', 'enumeration']
|
||||
and fm[f].get('search_terms', None)
|
||||
and f not in ['formats', 'ondevice']) or
|
||||
fm[f]['datatype'] in ['int', 'float', 'bool'] ):
|
||||
(fm[f]['datatype'] in ['int', 'float', 'bool'] and
|
||||
f not in ['id'])):
|
||||
self.all_fields.append(f)
|
||||
self.writable_fields.append(f)
|
||||
if fm[f]['datatype'] == 'composite':
|
||||
@ -393,6 +394,14 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
self.book_1_text.setObjectName(name)
|
||||
self.testgrid.addWidget(w, i+offset, 2, 1, 1)
|
||||
|
||||
ident_types = sorted(self.db.get_all_identifier_types(), key=sort_key)
|
||||
self.s_r_dst_ident.setCompleter(QCompleter(ident_types))
|
||||
try:
|
||||
self.s_r_dst_ident.setPlaceholderText(_('Enter an identifier type'))
|
||||
except:
|
||||
pass
|
||||
self.s_r_src_ident.addItems(ident_types)
|
||||
|
||||
self.main_heading = _(
|
||||
'<b>You can destroy your library using this feature.</b> '
|
||||
'Changes are permanent. There is no undo function. '
|
||||
@ -449,6 +458,8 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
self.test_text.editTextChanged[str].connect(self.s_r_paint_results)
|
||||
self.comma_separated.stateChanged.connect(self.s_r_paint_results)
|
||||
self.case_sensitive.stateChanged.connect(self.s_r_paint_results)
|
||||
self.s_r_src_ident.currentIndexChanged[int].connect(self.s_r_paint_results)
|
||||
self.s_r_dst_ident.textChanged.connect(self.s_r_paint_results)
|
||||
self.s_r_template.lost_focus.connect(self.s_r_template_changed)
|
||||
self.central_widget.setCurrentIndex(0)
|
||||
|
||||
@ -471,6 +482,8 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
self.query_field.addItems(sorted([q for q in self.queries], key=sort_key))
|
||||
self.query_field.currentIndexChanged[str].connect(self.s_r_query_change)
|
||||
self.query_field.setCurrentIndex(0)
|
||||
self.search_field.setCurrentIndex(0)
|
||||
self.s_r_search_field_changed(0)
|
||||
|
||||
def s_r_sf_itemdata(self, idx):
|
||||
if idx is None:
|
||||
@ -495,6 +508,13 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
val = mi.get(field, None)
|
||||
if isinstance(val, (int, float, bool)):
|
||||
val = str(val)
|
||||
elif fm['is_csp']:
|
||||
# convert the csp dict into a list
|
||||
id_type = unicode(self.s_r_src_ident.currentText())
|
||||
if id_type:
|
||||
val = [val.get(id_type, '')]
|
||||
else:
|
||||
val = [u'%s:%s'%(t[0], t[1]) for t in val.iteritems()]
|
||||
if val is None:
|
||||
val = [] if fm['is_multiple'] else ['']
|
||||
elif not fm['is_multiple']:
|
||||
@ -512,12 +532,17 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
self.s_r_search_field_changed(self.search_field.currentIndex())
|
||||
|
||||
def s_r_search_field_changed(self, idx):
|
||||
if self.search_mode.currentIndex() != 0 and idx == 1: # Template
|
||||
self.s_r_template.setVisible(False)
|
||||
self.template_label.setVisible(False)
|
||||
self.s_r_src_ident_label.setVisible(False)
|
||||
self.s_r_src_ident.setVisible(False)
|
||||
if idx == 1: # Template
|
||||
self.s_r_template.setVisible(True)
|
||||
self.template_label.setVisible(True)
|
||||
else:
|
||||
self.s_r_template.setVisible(False)
|
||||
self.template_label.setVisible(False)
|
||||
elif self.s_r_sf_itemdata(idx) == 'identifiers':
|
||||
self.s_r_src_ident_label.setVisible(True)
|
||||
self.s_r_src_ident.setVisible(True)
|
||||
|
||||
for i in range(0, self.s_r_number_of_books):
|
||||
w = getattr(self, 'book_%d_text'%(i+1))
|
||||
mi = self.db.get_metadata(self.ids[i], index_is_id=True)
|
||||
@ -535,10 +560,15 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
self.s_r_paint_results(None)
|
||||
|
||||
def s_r_destination_field_changed(self, idx):
|
||||
self.s_r_dst_ident_label.setVisible(False)
|
||||
self.s_r_dst_ident.setVisible(False)
|
||||
txt = self.s_r_df_itemdata(idx)
|
||||
if not txt:
|
||||
txt = self.s_r_sf_itemdata(None)
|
||||
if txt and txt in self.writable_fields:
|
||||
if txt == 'identifiers':
|
||||
self.s_r_dst_ident_label.setVisible(True)
|
||||
self.s_r_dst_ident.setVisible(True)
|
||||
self.destination_field_fm = self.db.metadata_for_field(txt)
|
||||
self.s_r_paint_results(None)
|
||||
|
||||
@ -617,6 +647,10 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
dest = src
|
||||
dest_mode = self.replace_mode.currentIndex()
|
||||
|
||||
if self.destination_field_fm['is_csp']:
|
||||
if not unicode(self.s_r_dst_ident.text()):
|
||||
raise Exception(_('You must specify a destination identifier type'))
|
||||
|
||||
if self.destination_field_fm['is_multiple']:
|
||||
if self.comma_separated.isChecked():
|
||||
if dest == 'authors':
|
||||
@ -635,6 +669,13 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
|
||||
if dest_mode != 0:
|
||||
dest_val = mi.get(dest, '')
|
||||
if self.db.metadata_for_field(dest)['is_csp']:
|
||||
dst_id_type = unicode(self.s_r_dst_ident.text())
|
||||
if dst_id_type:
|
||||
dest_val = [dest_val.get(dst_id_type, '')]
|
||||
else:
|
||||
# convert the csp dict into a list
|
||||
dest_val = [u'%s:%s'%(t[0], t[1]) for t in dest_val.iteritems()]
|
||||
if dest_val is None:
|
||||
dest_val = []
|
||||
elif not isinstance(dest_val, list):
|
||||
@ -717,6 +758,17 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
'Book title %s not processed')%mi.title,
|
||||
show=True)
|
||||
return
|
||||
# convert the colon-separated pair strings back into a dict, which
|
||||
# is what set_identifiers wants
|
||||
if dfm['is_csp']:
|
||||
dst_id_type = unicode(self.s_r_dst_ident.text())
|
||||
if dst_id_type:
|
||||
v = ''.join(val)
|
||||
ids = mi.get(dest)
|
||||
ids[dst_id_type] = v
|
||||
val = ids
|
||||
else:
|
||||
val = dict([(t.split(':')) for t in val])
|
||||
else:
|
||||
val = self.s_r_replace_mode_separator().join(val)
|
||||
if dest == 'title' and len(val) == 0:
|
||||
@ -961,11 +1013,13 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
query['search_field'] = unicode(self.search_field.currentText())
|
||||
query['search_mode'] = unicode(self.search_mode.currentText())
|
||||
query['s_r_template'] = unicode(self.s_r_template.text())
|
||||
query['s_r_src_ident'] = unicode(self.s_r_src_ident.currentText())
|
||||
query['search_for'] = unicode(self.search_for.text())
|
||||
query['case_sensitive'] = self.case_sensitive.isChecked()
|
||||
query['replace_with'] = unicode(self.replace_with.text())
|
||||
query['replace_func'] = unicode(self.replace_func.currentText())
|
||||
query['destination_field'] = unicode(self.destination_field.currentText())
|
||||
query['s_r_dst_ident'] = unicode(self.s_r_dst_ident.text())
|
||||
query['replace_mode'] = unicode(self.replace_mode.currentText())
|
||||
query['comma_separated'] = self.comma_separated.isChecked()
|
||||
query['results_count'] = self.results_count.value()
|
||||
@ -992,37 +1046,61 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
|
||||
self.s_r_reset_query_fields()
|
||||
return
|
||||
|
||||
def set_index(attr, txt):
|
||||
def set_text(attr, key):
|
||||
try:
|
||||
attr.setCurrentIndex(attr.findText(txt))
|
||||
attr.setText(item[key])
|
||||
except:
|
||||
pass
|
||||
|
||||
def set_checked(attr, key):
|
||||
try:
|
||||
attr.setChecked(item[key])
|
||||
except:
|
||||
attr.setChecked(False)
|
||||
|
||||
def set_value(attr, key):
|
||||
try:
|
||||
attr.setValue(int(item[key]))
|
||||
except:
|
||||
attr.setValue(0)
|
||||
|
||||
def set_index(attr, key):
|
||||
try:
|
||||
attr.setCurrentIndex(attr.findText(item[key]))
|
||||
except:
|
||||
attr.setCurrentIndex(0)
|
||||
|
||||
set_index(self.search_mode, item['search_mode'])
|
||||
set_index(self.search_field, item['search_field'])
|
||||
self.s_r_template.setText(item['s_r_template'])
|
||||
set_index(self.search_mode, 'search_mode')
|
||||
set_index(self.search_field, 'search_field')
|
||||
set_text(self.s_r_template, 's_r_template')
|
||||
|
||||
self.s_r_template_changed() #simulate gain/loss of focus
|
||||
self.search_for.setText(item['search_for'])
|
||||
self.case_sensitive.setChecked(item['case_sensitive'])
|
||||
self.replace_with.setText(item['replace_with'])
|
||||
set_index(self.replace_func, item['replace_func'])
|
||||
set_index(self.destination_field, item['destination_field'])
|
||||
set_index(self.replace_mode, item['replace_mode'])
|
||||
self.comma_separated.setChecked(item['comma_separated'])
|
||||
self.results_count.setValue(int(item['results_count']))
|
||||
self.starting_from.setValue(int(item['starting_from']))
|
||||
self.multiple_separator.setText(item['multiple_separator'])
|
||||
|
||||
set_index(self.s_r_src_ident, 's_r_src_ident');
|
||||
set_text(self.s_r_dst_ident, 's_r_dst_ident')
|
||||
set_text(self.search_for, 'search_for')
|
||||
set_checked(self.case_sensitive, 'case_sensitive')
|
||||
set_text(self.replace_with, 'replace_with')
|
||||
set_index(self.replace_func, 'replace_func')
|
||||
set_index(self.destination_field, 'destination_field')
|
||||
set_index(self.replace_mode, 'replace_mode')
|
||||
set_checked(self.comma_separated, 'comma_separated')
|
||||
set_value(self.results_count, 'results_count')
|
||||
set_value(self.starting_from, 'starting_from')
|
||||
set_text(self.multiple_separator, 'multiple_separator')
|
||||
|
||||
def s_r_reset_query_fields(self):
|
||||
# Don't reset the search mode. The user will probably want to use it
|
||||
# as it was
|
||||
self.search_field.setCurrentIndex(0)
|
||||
self.s_r_src_ident.setCurrentIndex(0)
|
||||
self.s_r_template.setText("")
|
||||
self.search_for.setText("")
|
||||
self.case_sensitive.setChecked(False)
|
||||
self.replace_with.setText("")
|
||||
self.replace_func.setCurrentIndex(0)
|
||||
self.destination_field.setCurrentIndex(0)
|
||||
self.s_r_dst_ident.setText('')
|
||||
self.replace_mode.setCurrentIndex(0)
|
||||
self.comma_separated.setChecked(True)
|
||||
self.results_count.setValue(999)
|
||||
|
@ -732,6 +732,29 @@ Future conversion of these books will use the default settings.</string>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<widget class="QLabel" name="s_r_src_ident_label">
|
||||
<property name="text">
|
||||
<string>Identifier type:</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>s_r_src_ident</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="1">
|
||||
<widget class="QComboBox" name="s_r_src_ident">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
||||
<horstretch>100</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="toolTip">
|
||||
<string>Choose which identifier type to operate upon</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<widget class="QLabel" name="template_label">
|
||||
<property name="text">
|
||||
@ -910,7 +933,30 @@ not multiple and the destination field is multiple</string>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="9" column="1" colspan="2">
|
||||
<item row="9" column="0">
|
||||
<widget class="QLabel" name="s_r_dst_ident_label">
|
||||
<property name="text">
|
||||
<string>Identifier type:</string>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>s_r_dst_ident</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="9" column="1">
|
||||
<widget class="QLineEdit" name="s_r_dst_ident">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
||||
<horstretch>100</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="toolTip">
|
||||
<string>Choose which identifier type to operate upon</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="10" column="1" colspan="2">
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_21">
|
||||
<item>
|
||||
<spacer name="HSpacer_347">
|
||||
@ -996,7 +1042,7 @@ not multiple and the destination field is multiple</string>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="10" column="0" colspan="4">
|
||||
<item row="11" column="0" colspan="4">
|
||||
<widget class="QScrollArea" name="scrollArea11">
|
||||
<property name="frameShape">
|
||||
<enum>QFrame::NoFrame</enum>
|
||||
@ -1120,6 +1166,7 @@ not multiple and the destination field is multiple</string>
|
||||
<tabstop>remove_button</tabstop>
|
||||
<tabstop>search_field</tabstop>
|
||||
<tabstop>search_mode</tabstop>
|
||||
<tabstop>s_r_src_ident</tabstop>
|
||||
<tabstop>s_r_template</tabstop>
|
||||
<tabstop>search_for</tabstop>
|
||||
<tabstop>case_sensitive</tabstop>
|
||||
@ -1128,6 +1175,7 @@ not multiple and the destination field is multiple</string>
|
||||
<tabstop>destination_field</tabstop>
|
||||
<tabstop>replace_mode</tabstop>
|
||||
<tabstop>comma_separated</tabstop>
|
||||
<tabstop>s_r_dst_ident</tabstop>
|
||||
<tabstop>results_count</tabstop>
|
||||
<tabstop>starting_from</tabstop>
|
||||
<tabstop>multiple_separator</tabstop>
|
||||
|
@ -12,7 +12,7 @@ from threading import Thread
|
||||
|
||||
from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QDate, \
|
||||
QPixmap, QListWidgetItem, QDialog, pyqtSignal, QIcon, \
|
||||
QPushButton
|
||||
QPushButton, QKeySequence
|
||||
|
||||
from calibre.gui2 import error_dialog, file_icon_provider, dynamic, \
|
||||
choose_files, choose_images, ResizableDialog, \
|
||||
@ -472,17 +472,19 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
||||
self.prev_button = QPushButton(QIcon(I('back.png')), _('Previous'),
|
||||
self)
|
||||
self.button_box.addButton(self.prev_button, self.button_box.ActionRole)
|
||||
tip = _('Save changes and edit the metadata of %s')%prev
|
||||
tip = (_('Save changes and edit the metadata of %s')+' [Alt+Left]')%prev
|
||||
self.prev_button.setToolTip(tip)
|
||||
self.prev_button.clicked.connect(partial(self.next_triggered,
|
||||
-1))
|
||||
self.prev_button.setShortcut(QKeySequence('Alt+Left'))
|
||||
if next_:
|
||||
self.next_button = QPushButton(QIcon(I('forward.png')), _('Next'),
|
||||
self)
|
||||
self.button_box.addButton(self.next_button, self.button_box.ActionRole)
|
||||
tip = _('Save changes and edit the metadata of %s')%next_
|
||||
tip = (_('Save changes and edit the metadata of %s')+' [Alt+Right]')%next_
|
||||
self.next_button.setToolTip(tip)
|
||||
self.next_button.clicked.connect(partial(self.next_triggered, 1))
|
||||
self.next_button.setShortcut(QKeySequence('Alt+Right'))
|
||||
|
||||
self.splitter.setStretchFactor(100, 1)
|
||||
self.read_state()
|
||||
|
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import time, os
|
||||
|
||||
from PyQt4.Qt import SIGNAL, QUrl, QAbstractListModel, Qt, \
|
||||
QVariant
|
||||
QVariant, QFont
|
||||
|
||||
from calibre.web.feeds.recipes import compile_recipe, custom_recipes
|
||||
from calibre.web.feeds.news import AutomaticNewsRecipe
|
||||
@ -83,6 +83,9 @@ class UserProfiles(ResizableDialog, Ui_Dialog):
|
||||
self._model = self.model = CustomRecipeModel(recipe_model)
|
||||
self.available_profiles.setModel(self._model)
|
||||
self.available_profiles.currentChanged = self.current_changed
|
||||
f = QFont()
|
||||
f.setStyleHint(f.Monospace)
|
||||
self.source_code.setFont(f)
|
||||
|
||||
self.connect(self.remove_feed_button, SIGNAL('clicked(bool)'),
|
||||
self.added_feeds.remove_selected_items)
|
||||
|
@ -410,11 +410,6 @@ p, li { white-space: pre-wrap; }
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="font">
|
||||
<font>
|
||||
<family>DejaVu Sans Mono</family>
|
||||
</font>
|
||||
</property>
|
||||
<property name="lineWrapMode">
|
||||
<enum>QTextEdit::NoWrap</enum>
|
||||
</property>
|
||||
|
325
src/calibre/gui2/dnd.py
Normal file
@ -0,0 +1,325 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import posixpath, os, urllib, re
|
||||
from urlparse import urlparse, urlunparse
|
||||
from threading import Thread
|
||||
from Queue import Queue, Empty
|
||||
|
||||
from PyQt4.Qt import QPixmap, Qt, QDialog, QLabel, QVBoxLayout, \
|
||||
QDialogButtonBox, QProgressBar, QTimer
|
||||
|
||||
from calibre.constants import DEBUG, iswindows
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre import browser, as_unicode, prints
|
||||
from calibre.gui2 import error_dialog
|
||||
|
||||
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'gif', 'png', 'bmp']
|
||||
|
||||
class Worker(Thread): # {{{
|
||||
|
||||
def __init__(self, url, fpath, rq):
|
||||
Thread.__init__(self)
|
||||
self.url, self.fpath = url, fpath
|
||||
self.daemon = True
|
||||
self.rq = rq
|
||||
self.err = self.tb = None
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
br = browser()
|
||||
br.retrieve(self.url, self.fpath, self.callback)
|
||||
except Exception, e:
|
||||
self.err = as_unicode(e)
|
||||
import traceback
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
def callback(self, a, b, c):
|
||||
self.rq.put((a, b, c))
|
||||
# }}}
|
||||
|
||||
class DownloadDialog(QDialog): # {{{
|
||||
|
||||
def __init__(self, url, fname, parent):
|
||||
QDialog.__init__(self, parent)
|
||||
self.setWindowTitle(_('Download %s')%fname)
|
||||
self.l = QVBoxLayout(self)
|
||||
self.purl = urlparse(url)
|
||||
self.msg = QLabel(_('Downloading <b>%s</b> from %s')%(fname,
|
||||
self.purl.netloc))
|
||||
self.msg.setWordWrap(True)
|
||||
self.l.addWidget(self.msg)
|
||||
self.pb = QProgressBar(self)
|
||||
self.pb.setMinimum(0)
|
||||
self.pb.setMaximum(0)
|
||||
self.l.addWidget(self.pb)
|
||||
self.bb = QDialogButtonBox(QDialogButtonBox.Cancel, Qt.Horizontal, self)
|
||||
self.l.addWidget(self.bb)
|
||||
self.bb.rejected.connect(self.reject)
|
||||
sz = self.sizeHint()
|
||||
self.resize(max(sz.width(), 400), sz.height())
|
||||
|
||||
fpath = PersistentTemporaryFile(os.path.splitext(fname)[1])
|
||||
fpath.close()
|
||||
self.fpath = fpath.name
|
||||
|
||||
self.worker = Worker(url, self.fpath, Queue())
|
||||
self.rejected = False
|
||||
|
||||
def reject(self):
|
||||
self.rejected = True
|
||||
QDialog.reject(self)
|
||||
|
||||
def start_download(self):
|
||||
self.worker.start()
|
||||
QTimer.singleShot(50, self.update)
|
||||
self.exec_()
|
||||
if self.worker.err is not None:
|
||||
error_dialog(self.parent(), _('Download failed'),
|
||||
_('Failed to download from %r with error: %s')%(
|
||||
self.worker.url, self.worker.err),
|
||||
det_msg=self.worker.tb, show=True)
|
||||
|
||||
def update(self):
|
||||
if self.rejected:
|
||||
return
|
||||
|
||||
try:
|
||||
progress = self.worker.rq.get_nowait()
|
||||
except Empty:
|
||||
pass
|
||||
else:
|
||||
self.update_pb(progress)
|
||||
|
||||
if not self.worker.is_alive():
|
||||
return self.accept()
|
||||
QTimer.singleShot(50, self.update)
|
||||
|
||||
def update_pb(self, progress):
|
||||
transferred, block_size, total = progress
|
||||
if total == -1:
|
||||
self.pb.setMaximum(0)
|
||||
self.pb.setMinimum(0)
|
||||
self.pb.setValue(0)
|
||||
else:
|
||||
so_far = transferred * block_size
|
||||
self.pb.setMaximum(max(total, so_far))
|
||||
self.pb.setValue(so_far)
|
||||
|
||||
@property
|
||||
def err(self):
|
||||
return self.worker.err
|
||||
|
||||
# }}}
|
||||
|
||||
def dnd_has_image(md):
|
||||
return md.hasImage()
|
||||
|
||||
def data_as_string(f, md):
|
||||
raw = bytes(md.data(f))
|
||||
if '/x-moz' in f:
|
||||
try:
|
||||
raw = raw.decode('utf-16')
|
||||
except:
|
||||
pass
|
||||
return raw
|
||||
|
||||
def dnd_has_extension(md, extensions):
|
||||
if DEBUG:
|
||||
prints('Debugging DND event')
|
||||
for f in md.formats():
|
||||
f = unicode(f)
|
||||
prints(f, repr(data_as_string(f, md))[:300], '\n')
|
||||
print ()
|
||||
if has_firefox_ext(md, extensions):
|
||||
return True
|
||||
if md.hasUrls():
|
||||
urls = [unicode(u.toString()) for u in
|
||||
md.urls()]
|
||||
purls = [urlparse(u) for u in urls]
|
||||
if DEBUG:
|
||||
prints('URLS:', urls)
|
||||
prints('Paths:', [u2p(x) for x in purls])
|
||||
|
||||
exts = frozenset([posixpath.splitext(u.path)[1][1:].lower() for u in
|
||||
purls])
|
||||
return bool(exts.intersection(frozenset(extensions)))
|
||||
return False
|
||||
|
||||
def u2p(url):
|
||||
path = url.path
|
||||
if iswindows:
|
||||
if path.startswith('/'):
|
||||
path = path[1:]
|
||||
ans = path.replace('/', os.sep)
|
||||
if os.path.exists(ans):
|
||||
return ans
|
||||
# Try unquoting the URL
|
||||
return urllib.unquote(ans)
|
||||
|
||||
def dnd_get_image(md, image_exts=IMAGE_EXTENSIONS):
|
||||
'''
|
||||
Get the image in the QMimeData object md.
|
||||
|
||||
:return: None, None if no image is found
|
||||
QPixmap, None if an image is found, the pixmap is guaranteed not
|
||||
null
|
||||
url, filename if a URL that points to an image is found
|
||||
'''
|
||||
if dnd_has_image(md):
|
||||
for x in md.formats():
|
||||
x = unicode(x)
|
||||
if x.startswith('image/'):
|
||||
cdata = bytes(md.data(x))
|
||||
pmap = QPixmap()
|
||||
pmap.loadFromData(cdata)
|
||||
if not pmap.isNull():
|
||||
return pmap, None
|
||||
break
|
||||
|
||||
# No image, look for a URL pointing to an image
|
||||
if md.hasUrls():
|
||||
urls = [unicode(u.toString()) for u in
|
||||
md.urls()]
|
||||
purls = [urlparse(u) for u in urls]
|
||||
# First look for a local file
|
||||
images = [u2p(x) for x in purls if x.scheme in ('', 'file') and
|
||||
posixpath.splitext(urllib.unquote(x.path))[1][1:].lower() in
|
||||
image_exts]
|
||||
images = [x for x in images if os.path.exists(x)]
|
||||
p = QPixmap()
|
||||
for path in images:
|
||||
try:
|
||||
with open(path, 'rb') as f:
|
||||
p.loadFromData(f.read())
|
||||
except:
|
||||
continue
|
||||
if not p.isNull():
|
||||
return p, None
|
||||
|
||||
# No local images, look for remote ones
|
||||
|
||||
# First, see if this is from Firefox
|
||||
rurl, fname = get_firefox_rurl(md, image_exts)
|
||||
|
||||
if rurl and fname:
|
||||
return rurl, fname
|
||||
# Look through all remaining URLs
|
||||
remote_urls = [x for x in purls if x.scheme in ('http', 'https',
|
||||
'ftp') and posixpath.splitext(x.path)[1][1:].lower() in image_exts]
|
||||
if remote_urls:
|
||||
rurl = remote_urls[0]
|
||||
fname = posixpath.basename(urllib.unquote(rurl.path))
|
||||
return urlunparse(rurl), fname
|
||||
|
||||
return None, None
|
||||
|
||||
def dnd_get_files(md, exts):
|
||||
'''
|
||||
Get the file in the QMimeData object md with an extension that is one of
|
||||
the extensions in exts.
|
||||
|
||||
:return: None, None if no file is found
|
||||
[paths], None if a local file is found
|
||||
[urls], [filenames] if URLs that point to a files are found
|
||||
'''
|
||||
# Look for a URL pointing to a file
|
||||
if md.hasUrls():
|
||||
urls = [unicode(u.toString()) for u in
|
||||
md.urls()]
|
||||
purls = [urlparse(u) for u in urls]
|
||||
# First look for a local file
|
||||
local_files = [u2p(x) for x in purls if x.scheme in ('', 'file') and
|
||||
posixpath.splitext(urllib.unquote(x.path))[1][1:].lower() in
|
||||
exts]
|
||||
local_files = [x for x in local_files if os.path.exists(x)]
|
||||
if local_files:
|
||||
return local_files, None
|
||||
|
||||
# No local files, look for remote ones
|
||||
|
||||
# First, see if this is from Firefox
|
||||
rurl, fname = get_firefox_rurl(md, exts)
|
||||
if rurl and fname:
|
||||
return [rurl], [fname]
|
||||
|
||||
# Look through all remaining URLs
|
||||
remote_urls = [x for x in purls if x.scheme in ('http', 'https',
|
||||
'ftp') and posixpath.splitext(x.path)[1][1:].lower() in exts]
|
||||
if remote_urls:
|
||||
filenames = [posixpath.basename(urllib.unquote(rurl.path)) for rurl in
|
||||
remote_urls]
|
||||
return [urlunparse(x) for x in remote_urls], filenames
|
||||
|
||||
return None, None
|
||||
|
||||
def _get_firefox_pair(md, exts, url, fname):
|
||||
url = bytes(md.data(url)).decode('utf-16')
|
||||
fname = bytes(md.data(fname)).decode('utf-16')
|
||||
while url.endswith('\x00'):
|
||||
url = url[:-1]
|
||||
while fname.endswith('\x00'):
|
||||
fname = fname[:-1]
|
||||
if not url or not fname:
|
||||
return None, None
|
||||
ext = posixpath.splitext(fname)[1][1:].lower()
|
||||
# Weird firefox bug on linux
|
||||
ext = {'jpe':'jpg', 'epu':'epub', 'mob':'mobi'}.get(ext, ext)
|
||||
fname = os.path.splitext(fname)[0] + '.' + ext
|
||||
if DEBUG:
|
||||
prints('Firefox file promise:', url, fname)
|
||||
if ext not in exts:
|
||||
fname = url = None
|
||||
return url, fname
|
||||
|
||||
|
||||
def get_firefox_rurl(md, exts):
|
||||
formats = frozenset([unicode(x) for x in md.formats()])
|
||||
url = fname = None
|
||||
if 'application/x-moz-file-promise-url' in formats and \
|
||||
'application/x-moz-file-promise-dest-filename' in formats:
|
||||
try:
|
||||
url, fname = _get_firefox_pair(md, exts,
|
||||
'application/x-moz-file-promise-url',
|
||||
'application/x-moz-file-promise-dest-filename')
|
||||
except:
|
||||
if DEBUG:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
if url is None and 'text/x-moz-url-data' in formats and \
|
||||
'text/x-moz-url-desc' in formats:
|
||||
try:
|
||||
url, fname = _get_firefox_pair(md, exts,
|
||||
'text/x-moz-url-data', 'text/x-moz-url-desc')
|
||||
except:
|
||||
if DEBUG:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if url is None and '_NETSCAPE_URL' in formats:
|
||||
try:
|
||||
raw = bytes(md.data('_NETSCAPE_URL'))
|
||||
raw = raw.decode('utf-8')
|
||||
lines = raw.splitlines()
|
||||
if len(lines) > 1 and re.match(r'[a-z]+://', lines[1]) is None:
|
||||
url, fname = lines[:2]
|
||||
ext = posixpath.splitext(fname)[1][1:].lower()
|
||||
if ext not in exts:
|
||||
fname = url = None
|
||||
except:
|
||||
if DEBUG:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
if DEBUG:
|
||||
prints('Firefox rurl:', url, fname)
|
||||
return url, fname
|
||||
|
||||
def has_firefox_ext(md, exts):
|
||||
return bool(get_firefox_rurl(md, exts)[0])
|
||||
|
@ -44,13 +44,13 @@ class LibraryViewMixin(object): # {{{
|
||||
for view in (self.library_view, self.memory_view, self.card_a_view, self.card_b_view):
|
||||
getattr(view, func)(*args)
|
||||
|
||||
self.memory_view.connect_dirtied_signal(self.upload_booklists)
|
||||
self.memory_view.connect_dirtied_signal(self.upload_dirtied_booklists)
|
||||
self.memory_view.connect_upload_collections_signal(
|
||||
func=self.upload_collections, oncard=None)
|
||||
self.card_a_view.connect_dirtied_signal(self.upload_booklists)
|
||||
self.card_a_view.connect_dirtied_signal(self.upload_dirtied_booklists)
|
||||
self.card_a_view.connect_upload_collections_signal(
|
||||
func=self.upload_collections, oncard='carda')
|
||||
self.card_b_view.connect_dirtied_signal(self.upload_booklists)
|
||||
self.card_b_view.connect_dirtied_signal(self.upload_dirtied_booklists)
|
||||
self.card_b_view.connect_upload_collections_signal(
|
||||
func=self.upload_collections, oncard='cardb')
|
||||
self.book_on_device(None, reset=True)
|
||||
@ -264,6 +264,9 @@ class LayoutMixin(object): # {{{
|
||||
self.book_details.files_dropped.connect(self.iactions['Add Books'].files_dropped_on_book)
|
||||
self.book_details.cover_changed.connect(self.bd_cover_changed,
|
||||
type=Qt.QueuedConnection)
|
||||
self.book_details.remote_file_dropped.connect(
|
||||
self.iactions['Add Books'].remote_file_dropped_on_book,
|
||||
type=Qt.QueuedConnection)
|
||||
self.book_details.open_containing_folder.connect(self.iactions['View'].view_folder_for_id)
|
||||
self.book_details.view_specific_format.connect(self.iactions['View'].view_format_by_id)
|
||||
|
||||
|
@ -5,7 +5,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
from math import cos, sin, pi
|
||||
|
||||
from PyQt4.Qt import QColor, Qt, QModelIndex, QSize, \
|
||||
@ -245,13 +244,13 @@ class CcTextDelegate(QStyledItemDelegate): # {{{
|
||||
typ = m.custom_columns[col]['datatype']
|
||||
if typ == 'int':
|
||||
editor = QSpinBox(parent)
|
||||
editor.setRange(-100, sys.maxint)
|
||||
editor.setRange(-100, 100000000)
|
||||
editor.setSpecialValueText(_('Undefined'))
|
||||
editor.setSingleStep(1)
|
||||
elif typ == 'float':
|
||||
editor = QDoubleSpinBox(parent)
|
||||
editor.setSpecialValueText(_('Undefined'))
|
||||
editor.setRange(-100., float(sys.maxint))
|
||||
editor.setRange(-100., 100000000)
|
||||
editor.setDecimals(2)
|
||||
else:
|
||||
editor = MultiCompleteLineEdit(parent)
|
||||
|