[Sync] Sync with trunk. Revision 9312
@ -31,3 +31,4 @@ nbproject/
|
||||
.pydevproject
|
||||
.settings/
|
||||
*.DS_Store
|
||||
calibre_plugins/
|
143
Changelog.yaml
@ -19,6 +19,149 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.2
|
||||
date: 2011-05-20
|
||||
|
||||
new features:
|
||||
- title: "Various new ebook sources added to Get Books: Google Books, O'Reilly, archive.org, some Polish ebooks stores, etc."
|
||||
|
||||
- title: "Amazon metadata download: Allow user to configure Amazon plugin to use any of the US, UK, German, French and Italian Amazon websites"
|
||||
|
||||
- title: "When deleting large numbers of books, give the user the option to skip the Recycle Bin, since sending lots of files to the recycle bin can be very slow."
|
||||
tickets: [784987]
|
||||
|
||||
- title: "OS X: The unified title+toolbar was disabled as it had various bugs. If you really want it you can turn it on again via Preferences->Tweaks, but be aware that you will see problems like the calibre windowd being too wide, weird animations when a device is detected, etc."
|
||||
|
||||
- title: "Add a tweak that controls what words are treated as suffixes when generating an author sort string from an author name."
|
||||
|
||||
- title: "Get Books: Store last few searches in history"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix a crash when a device is connected/disconnected while a modal dialog opened from the toolbar is visible"
|
||||
tickets: [780484]
|
||||
|
||||
- title: "Fix incorrect results from ebooks.com when searching via Get Books"
|
||||
|
||||
- title: "Metadata plugboards: Add prioritization scheme to allow for using different settings for different locations"
|
||||
tickets: [783229]
|
||||
|
||||
- title: "Fix manage authors dialog too wide"
|
||||
tickets: [783065]
|
||||
|
||||
- title: "Fix multiple bracket types in author names not handled correctly when generating author sort string"
|
||||
tickets: [782551]
|
||||
|
||||
- title: "MOBI Input: Don't error out when detecting TOC structure if one of the elements has an invalid margin unit"
|
||||
|
||||
- title: "More fixes for japanese language calibre on windows"
|
||||
tickets: [782408]
|
||||
|
||||
- title: "Linux binaries: Always use either Cleanlook or Plastique styles for the GUI if no style can be loaded from the host computer"
|
||||
|
||||
improved recipes:
|
||||
- Newsweek
|
||||
- Economist
|
||||
- Dvhn
|
||||
- United Daily
|
||||
- Dagens Nyheter
|
||||
- GoComics
|
||||
- faz.net
|
||||
- golem.de
|
||||
|
||||
new recipes:
|
||||
- title: National Geographic
|
||||
author: gagsays
|
||||
|
||||
- title: Various German news sources
|
||||
author: schuster
|
||||
|
||||
- title: Dilema Veche
|
||||
author: Silviu Cotoara
|
||||
|
||||
- title: "Glamour, Good to Know, Good Housekeeping and Men's Health"
|
||||
author: Anonymous
|
||||
|
||||
- title: "Financial Sense and iProfessional"
|
||||
author: Darko Miletic
|
||||
|
||||
|
||||
- version: 0.8.1
|
||||
date: 2011-05-13
|
||||
|
||||
new features:
|
||||
- title: "Add Amazon DE, Beam EBooks, Beam DE, Weightless Books, Wizards Tower Books to the list of ebook stores searched by Get Books"
|
||||
|
||||
- title: "TXT output: All new Textile output with much greater preservation of formatting from the input document"
|
||||
|
||||
- title: "Migrate metadata plugin for Douban Books to the 0.8 API"
|
||||
|
||||
- title: "Driver for Dell Streak on windows"
|
||||
|
||||
- title: "Add menu items to Get Books action to search by title and author of current book"
|
||||
|
||||
- title: "Add title_sort as available field to CSV/XML catalogs"
|
||||
|
||||
- title: "Add a context menu to the manage authors dialog"
|
||||
|
||||
- title: "Add a button to paste isbn into the identifiers field in the edit metadata dialog automatically"
|
||||
|
||||
bug fixes:
|
||||
- title: "Amazon metadata download plugin: Fix links being stripped from comments. Also fix ratings/isbn not being parsed from kindle edition pages."
|
||||
tickets: [782012]
|
||||
|
||||
- title: "Fix one source of segfaults on shutdown in the linux binary builds."
|
||||
|
||||
- title: "Allow the use of condensed/expanded fonts as interface fonts"
|
||||
|
||||
- title: "EPUB Input: Ignore missing cover file when converting, instead of erroring out."
|
||||
tickets: [781848]
|
||||
|
||||
- title: "Fix custom identifier being erased by metadata download"
|
||||
tickets: [781759]
|
||||
|
||||
- title: "Fix regression that broke various things when using Japanese language calibre on windows"
|
||||
tickets: [780804]
|
||||
|
||||
- title: "RTF Input: Handle null color codes correctly"
|
||||
tickets: [780728]
|
||||
|
||||
- title: "ODT Input: Handle inline special styles defined on <text:span> tags."
|
||||
tickets: [780250]
|
||||
|
||||
- title: "Fix error when pressing next previous button with an empty search in the Plugins preferences"
|
||||
tickets: [781135]
|
||||
|
||||
- title: "Ignore 'Unknown' author when downloading metadata."
|
||||
tickets: [779348]
|
||||
|
||||
- title: "Fix timezone bug when setting dates in the edit metadata dialog"
|
||||
tickets: [779497]
|
||||
|
||||
- title: "Fix ebook-convert not recognizing output paths starting with .."
|
||||
tickets: [779322]
|
||||
|
||||
improved recipes:
|
||||
- "Strategy+Business"
|
||||
- Readers Digest
|
||||
- Ming Pao
|
||||
- Telepolis
|
||||
- Fronda
|
||||
- Rzeczpospolita
|
||||
|
||||
new recipes:
|
||||
- title: "Various Taiwanese news sources"
|
||||
author: Eddie Lau
|
||||
|
||||
- title: Replica Vedetelor, Ziua Veche
|
||||
author: Silviu Cotoara
|
||||
|
||||
- title: Welt der Physik
|
||||
author: schuster
|
||||
|
||||
- title: Korea Herald
|
||||
author: Seongkyoun Yoo
|
||||
|
||||
|
||||
- version: 0.8.0
|
||||
date: 2010-05-06
|
||||
|
||||
|
@ -93,7 +93,7 @@ class Arcamax(BasicNewsRecipe):
|
||||
for page in pages:
|
||||
page_soup = self.index_to_soup(url)
|
||||
if page_soup:
|
||||
title = page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0]
|
||||
title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0])
|
||||
page_url = url
|
||||
# orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
|
||||
prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
|
||||
@ -127,4 +127,3 @@ class Arcamax(BasicNewsRecipe):
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
|
46
recipes/bild_de.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
title = u'Bild.de'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
|
||||
# get cover from myspace
|
||||
cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
|
||||
|
||||
# set what to fetch on the site
|
||||
remove_tags_before = dict(name = 'h2', attrs={'id':'cover'})
|
||||
remove_tags_after = dict(name ='div', attrs={'class':'back'})
|
||||
|
||||
# thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
|
||||
# this one removes a lot of direct-link's
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
# remove the ad's
|
||||
filter_regexps = [r'.\.smartadserver\.com']
|
||||
def skip_ad_pages(self, soup):
|
||||
return None
|
||||
|
||||
#get the real url behind .feedsportal.com and fetch the artikels
|
||||
def get_article_url(self, article):
|
||||
return article.get('id', article.get('guid', None))
|
||||
|
||||
#list of the rss source from www.bild.de
|
||||
feeds = [(u'Überblick', u'http://rss.bild.de/bild.xml'),
|
||||
(u'News', u'http://rss.bild.de/bild-news.xml'),
|
||||
(u'Politik', u'http://rss.bild.de/bild-politik.xml'),
|
||||
(u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
|
||||
(u'Sport', u'http://rss.bild.de/bild-sport.xml'),
|
||||
(u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
|
||||
(u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml')
|
||||
]
|
33
recipes/borse_online.recipe
Normal file
@ -0,0 +1,33 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'Börse-online'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.dpv.de/images/1995/source.gif'
|
||||
masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg'
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
remove_tags_bevor = [dict(name='h3')]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
|
||||
remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}),
|
||||
dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']),
|
||||
dict(name=['h2', 'Gesamtranking', 'h3',''])]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('.html#nv=rss', '.html?mode=print')
|
||||
|
||||
|
||||
|
||||
feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')]
|
||||
|
61
recipes/capital_de.recipe
Normal file
@ -0,0 +1,61 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
|
||||
title = u'Capital.de'
|
||||
language = 'de'
|
||||
__author__ = 'schuster'
|
||||
oldest_article =7
|
||||
max_articles_per_feed = 35
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://www.wirtschaftsmedien-shop.de/media/stores/wirtschaftsmedien/capital/teaser_large_abo.jpg'
|
||||
cover_url = 'http://d1kb9jvg6ylufe.cloudfront.net/WebsiteCMS/de/unternehmen/linktipps/mainColumn/08/image/DE_Capital_bis20mm_SW.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
def print_version(self, url):
|
||||
return url.replace ('nv=rss#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', 'mode=print')
|
||||
remove_tags_bevor = [dict(name='td', attrs={'class':'textcell'})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'artikelsplit'})]
|
||||
|
||||
feeds = [ (u'Wirtschaftsmagazin', u'http://www.capital.de/rss/'),
|
||||
(u'Unternehmen', u'http://www.capital.de/rss/unternehmen'),
|
||||
(u'Finanz & Geldanlage', u'http://www.capital.de/rss/finanzen/geldanlage')]
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'class':'artikelsplit'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager.a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'printable'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('div', attrs={'class':'artikelsplit'}):
|
||||
item.extract()
|
||||
self.append_page(soup, soup.body, 3)
|
||||
pager = soup.find('div',attrs={'class':'artikelsplit'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
||||
|
||||
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'artikelsplit']}),
|
||||
dict(id=['topNav', 'mainNav', 'subNav', 'socialmedia', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere']),
|
||||
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
|
||||
dict(rel=['canonical'])]
|
||||
|
42
recipes/china_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
||||
title = u'中時電子報'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'),
|
||||
(u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'),
|
||||
(u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'),
|
||||
(u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'),
|
||||
(u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'),
|
||||
(u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'),
|
||||
(u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'),
|
||||
(u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'),
|
||||
(u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'),
|
||||
(u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'),
|
||||
#(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'),
|
||||
#(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
|
||||
#(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
|
||||
]
|
||||
|
||||
__author__ = 'einstuerzende, updated by Eddie Lau'
|
||||
__version__ = '1.0'
|
||||
language = 'zh'
|
||||
publisher = 'China Times Group'
|
||||
description = 'China Times (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
|
||||
cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['focus-news']})]
|
||||
|
34
recipes/cosmopolitan_de.recipe
Normal file
@ -0,0 +1,34 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
|
||||
title = u'Cosmopolitan.de'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 7
|
||||
language = 'de'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.cosmopolitan.com/cm/shared/site_images/print_this/cosmopolitan_logo.gif'
|
||||
remove_tags_before = dict(name = 'h1', attrs={'class':'artikel'})
|
||||
remove_tags_after = dict(name ='div', attrs={'class':'morePages'})
|
||||
extra_css = '''
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
||||
'''
|
||||
remove_tags = [ dict(id='strong'),
|
||||
dict(title='strong'),
|
||||
dict(name='span'),
|
||||
dict(name='li', attrs={'class':'large'}),
|
||||
dict(name='ul', attrs={'class':'articleImagesPortrait clearfix'}),
|
||||
dict(name='p', attrs={'class':'external'}),
|
||||
dict(name='a', attrs={'target':'_blank'}),]
|
||||
feeds = [ (u'Komplett', u'http://www.cosmopolitan.de/rss/allgemein.xml'),
|
||||
(u'Mode', u'http://www.cosmopolitan.de/rss/mode.xml'),
|
||||
(u'Beauty', u'http://www.cosmopolitan.de/rss/beauty.xml'),
|
||||
(u'Liebe&Sex', u'http://www.cosmopolitan.de/rss/liebe.xml'),
|
||||
(u'Psychologie', u'http://www.cosmopolitan.de/rss/psychologie.xml'),
|
||||
(u'Job&Karriere', u'http://www.cosmopolitan.de/rss/job.xml'),
|
||||
(u'Lifestyle', u'http://www.cosmopolitan.de/rss/lifestyle.xml'),
|
||||
(u'Shopping', u'http://www.cosmopolitan.de/rss/shopping.xml'),
|
||||
(u'Bildergalerien', u'http://www.cosmopolitan.de/rss/bildgalerien.xml')]
|
55
recipes/dilemaveche.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
dilemaveche.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DilemaVeche(BasicNewsRecipe):
|
||||
title = u'Dilema Veche'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Sunt vechi, domnule!'
|
||||
publisher = u'Dilema Veche'
|
||||
oldest_article = 50
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'art_title'})
|
||||
, dict(name='h1', attrs={'class':'art_title online'})
|
||||
, dict(name='div', attrs={'class':'item'})
|
||||
, dict(name='div', attrs={'class':'art_content'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['article_details']})
|
||||
, dict(name='div', attrs={'class':['controale']})
|
||||
, dict(name='div', attrs={'class':['art_related_left']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['article_details']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.dilemaveche.ro/rss.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
53
recipes/divahair.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
divahair.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DivaHair(BasicNewsRecipe):
|
||||
title = u'Diva Hair'
|
||||
language = 'ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Coafuri, frizuri, tunsori ..'
|
||||
publisher = u'Diva Hair'
|
||||
category = u'Ziare,Stiri,Coafuri,Femei'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.divahair.ro/imgs/logo.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='td', attrs={'class':'spatiuart'})
|
||||
, dict(name='div', attrs={'class':'spatiuart'})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'categorie'})
|
||||
, dict(name='div', attrs={'class':'gri gri2 detaliiart'})
|
||||
, dict(name='div', attrs={'class':'articol_box_bottom'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'articol_box_bottom'})
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://www.divahair.ro/feed') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -37,7 +37,7 @@ class DN_se(BasicNewsRecipe):
|
||||
,(u'Kultur' , u'http://www.dn.se/kultur-rss' )
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article-content'})]
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(name='div',attrs={'id':'byline'})
|
||||
remove_tags = [
|
||||
@ -45,5 +45,5 @@ class DN_se(BasicNewsRecipe):
|
||||
,dict(name='div',attrs={'id':'hook'})
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,19 +1,21 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1302341394(BasicNewsRecipe):
|
||||
title = u'DvhN'
|
||||
oldest_article = 1
|
||||
__author__ = 'Reijndert'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 200
|
||||
|
||||
__author__ = 'Reijndert'
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.dvhn.nl/template/Dagblad_v2.0/gfx/logo_DvhN.gif'
|
||||
cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg'
|
||||
language = 'nl'
|
||||
country = 'NL'
|
||||
version = 1
|
||||
publisher = u'Dagblad van het Noorden'
|
||||
category = u'Nieuws'
|
||||
description = u'Nieuws uit Noord Nederland'
|
||||
timefmt = ' %Y-%m-%d (%a)'
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'})
|
||||
@ -21,11 +23,26 @@ class AdvancedUserRecipe1302341394(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base'])
|
||||
,dict(name='span',attrs={'class':'copyright'})
|
||||
dict(name='span',attrs={'class':'location'})
|
||||
]
|
||||
|
||||
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'), (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'), (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'), (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'), (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'), (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'), (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'), (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<a.*?>'), lambda h1: '')
|
||||
,(re.compile(r'</a>'), lambda h2: '')
|
||||
,(re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'), lambda h3: '')
|
||||
,(re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '')
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss')
|
||||
, (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss')
|
||||
, (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss')
|
||||
, (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss')
|
||||
, (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss')
|
||||
, (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss')
|
||||
, (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss')
|
||||
, (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
||||
|
@ -20,7 +20,7 @@ class Economist(BasicNewsRecipe):
|
||||
INDEX = 'http://www.economist.com/printedition'
|
||||
description = ('Global news and current affairs from a European'
|
||||
' perspective. Best downloaded on Friday mornings (GMT)')
|
||||
|
||||
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
||||
oldest_article = 7.0
|
||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
|
@ -14,7 +14,7 @@ class Economist(BasicNewsRecipe):
|
||||
description = ('Global news and current affairs from a European'
|
||||
' perspective. Best downloaded on Friday mornings (GMT).'
|
||||
' Much slower than the print edition based version.')
|
||||
|
||||
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
|
||||
oldest_article = 7.0
|
||||
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
|
||||
remove_tags = [
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elmundo.es
|
||||
'''
|
||||
@ -10,15 +10,24 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class ElMundo(BasicNewsRecipe):
|
||||
title = 'El Mundo'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Spain'
|
||||
publisher = 'El Mundo'
|
||||
description = 'Lider de informacion en espaniol'
|
||||
publisher = 'Unidad Editorial Informacion General S.L.U.'
|
||||
category = 'news, politics, Spain'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso8859_15'
|
||||
language = 'es'
|
||||
language = 'es_ES'
|
||||
masthead_url = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
.metadata_noticia{font-size: small}
|
||||
h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974}
|
||||
.hora{color: red}
|
||||
.update{color: gray}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
@ -30,22 +39,31 @@ class ElMundo(BasicNewsRecipe):
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'noticia'})]
|
||||
remove_tags_before = dict(attrs={'class':['titular','antetitulo'] })
|
||||
remove_tags_after = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']})
|
||||
|
||||
remove_attributes = ['lang','border']
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
|
||||
,dict(name='div', attrs={'id':'modulo_multimedia' })
|
||||
,dict(name='ul', attrs={'class':'herramientas' })
|
||||
,dict(name=['object','link'])
|
||||
,dict(name=['object','link','embed','iframe','base','meta'])
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
|
||||
,(u'Deportes' , u'http://rss.elmundo.es/rss/descarga.htm?data2=14')
|
||||
,(u'Economia' , u'http://rss.elmundo.es/rss/descarga.htm?data2=7' )
|
||||
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
|
||||
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
|
||||
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
|
||||
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
|
||||
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
|
||||
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
||||
(u'Portada' , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml' )
|
||||
,(u'Deportes' , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml')
|
||||
,(u'Economia' , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml' )
|
||||
,(u'Espana' , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml' )
|
||||
,(u'Internacional' , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' )
|
||||
,(u'Cultura' , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml' )
|
||||
,(u'Ciencia/Ecologia', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml' )
|
||||
,(u'Comunicacion' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml' )
|
||||
,(u'Television' , u'http://estaticos.elmundo.es/elmundo/rss/television.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
|
74
recipes/express_de.recipe
Normal file
@ -0,0 +1,74 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'Express.de'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 50
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
extra_css = '''
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
||||
|
||||
'''
|
||||
remove_javascript = True
|
||||
remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'MoreNews'})]
|
||||
|
||||
remove_tags = [dict(id='kalaydo'),
|
||||
dict(id='Header'),
|
||||
dict(id='Searchline'),
|
||||
dict(id='MainNav'),
|
||||
dict(id='Logo'),
|
||||
dict(id='MainLinkSpacer'),
|
||||
dict(id='MainLinks'),
|
||||
dict(title='Diese Seite Bookmarken'),
|
||||
|
||||
dict(name='span'),
|
||||
dict(name='div', attrs={'class':'spacer_leftneu'}),
|
||||
dict(name='div', attrs={'class':'button kalaydologo'}),
|
||||
dict(name='div', attrs={'class':'button stellenneu'}),
|
||||
dict(name='div', attrs={'class':'button autoneu'}),
|
||||
dict(name='div', attrs={'class':'button immobilienneu'}),
|
||||
dict(name='div', attrs={'class':'button kleinanzeigen'}),
|
||||
dict(name='div', attrs={'class':'button tiereneu'}),
|
||||
dict(name='div', attrs={'class':'button ferienwohnungen'}),
|
||||
dict(name='div', attrs={'class':'button inserierenneu'}),
|
||||
dict(name='div', attrs={'class':'spacer_rightneu'}),
|
||||
dict(name='div', attrs={'class':'spacer_rightcorner'}),
|
||||
dict(name='div', attrs={'class':'HeaderMetaNav'}),
|
||||
dict(name='div', attrs={'class':'HeaderSearchOption'}),
|
||||
dict(name='div', attrs={'class':'HeaderSearch'}),
|
||||
dict(name='div', attrs={'class':'sbutton'}),
|
||||
dict(name='div', attrs={'class':'active'}),
|
||||
|
||||
]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
feeds = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
|
||||
(u'Regional - Köln', u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
|
||||
(u'Regional - Bonn', u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
|
||||
(u'Regional - Düsseldorf', u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
|
||||
(u'Regional - Region', u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
|
||||
(u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
|
||||
(u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
|
||||
(u'1.FC Köln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
|
||||
(u'Alemannia Aachen News', u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
|
||||
(u'Borussia M~Gladbach', u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
|
||||
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
|
||||
(u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
|
||||
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
|
||||
|
||||
|
||||
|
||||
]
|
@ -1,51 +1,38 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
|
||||
'''
|
||||
Profile to download FAZ.net
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FazNet(BasicNewsRecipe):
|
||||
title = 'FAZ NET'
|
||||
__author__ = 'Kovid Goyal, Darko Miletic'
|
||||
title = u'Faz.net'
|
||||
__author__ = 'schuster'
|
||||
remove_tags = [dict(attrs={'class':['right', 'ArrowLinkRight', 'ModulVerlagsInfo', 'left', 'Head']}),
|
||||
dict(id=['BreadCrumbs', 'tstag', 'FazFooterPrint']),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
oldest_article = 2
|
||||
description = 'Frankfurter Allgemeine Zeitung'
|
||||
publisher = 'FAZ Electronic Media GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','base'])
|
||||
,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ]
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.faz.net/f30/Images/Logos/logo.gif'
|
||||
|
||||
def print_version(self, url):
|
||||
article, sep, rest = url.partition('?')
|
||||
return article.replace('.html', '~Afor~Eprint.html')
|
||||
return url.replace('.html', '~Afor~Eprint.html')
|
||||
|
||||
|
||||
|
||||
feeds = [(u'Politik', u'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Wirtschaft', u'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Feuilleton', u'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Sport', u'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Gesellschaft', u'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Finanzen', u'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Wissen', u'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Reise', u'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Technik & Motor', u'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Beruf & Chance', u'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Kunstmarkt', u'http://www.faz.net/s/RubBC09F7BF72A2405A96718ECBFB68FBFE/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Immobilien ', u'http://www.faz.net/s/RubFED172A9E10F46B3A5F01B02098C0C8D/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Rhein-Main Zeitung', u'http://www.faz.net/s/RubABE881A6669742C2A5EBCB5D50D7EBEE/Tpl~Epartner~SRss_.xml'),
|
||||
(u'Atomdebatte ', u'http://www.faz.net/s/Rub469C43057F8C437CACC2DE9ED41B7950/Tpl~Epartner~SRss_.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
del soup.body['onload']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
64
recipes/financialsense.recipe
Normal file
@ -0,0 +1,64 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.financialsense.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FinancialSense(BasicNewsRecipe):
|
||||
title = 'Financial Sense'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Uncommon News & Views for the Wise Investor'
|
||||
publisher = 'Financial Sense'
|
||||
category = 'news, finances, politics, USA'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://www.financialsense.com/sites/default/files/logo.jpg'
|
||||
extra_css = """
|
||||
body{font-family: Arial,"Helvetica Neue",Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
h2{color: gray}
|
||||
.name{margin-right: 5em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags =[dict(name=['meta','link','base','object','embed','iframe'])]
|
||||
remove_tags_after=dict(attrs={'class':'vcard'})
|
||||
keep_only_tags =[dict(attrs={'class':['title','post-meta','content','item-title','vcard']})]
|
||||
remove_attributes=['lang','type']
|
||||
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds.feedburner.com/fso')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
48
recipes/focus_de.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
|
||||
title = u'Focus (DE)'
|
||||
__author__ = 'Anonymous'
|
||||
language = 'de'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?drucken=1'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['article']}) ]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'sidebar'}),
|
||||
dict(name='div', attrs={'class':'commentForm'}),
|
||||
dict(name='div', attrs={'class':'comment clearfix oid-3534591 open'}),
|
||||
dict(name='div', attrs={'class':'similarityBlock'}),
|
||||
dict(name='div', attrs={'class':'footer'}),
|
||||
dict(name='div', attrs={'class':'getMoreComments'}),
|
||||
dict(name='div', attrs={'class':'moreComments'}),
|
||||
dict(name='div', attrs={'class':'ads'}),
|
||||
dict(name='div', attrs={'class':'articleContent'}),
|
||||
|
||||
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(name='div',attrs={'class':['commentForm','title', 'actions clearfix']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [ (u'Eilmeldungen', u'http://rss2.focus.de/c/32191/f/533875/index.rss'),
|
||||
(u'Auto-News', u'http://rss2.focus.de/c/32191/f/443320/index.rss'),
|
||||
(u'Digital-News', u'http://rss2.focus.de/c/32191/f/443315/index.rss'),
|
||||
(u'Finanzen-News', u'http://rss2.focus.de/c/32191/f/443317/index.rss'),
|
||||
(u'Gesundheit-News', u'http://rss2.focus.de/c/32191/f/443314/index.rss'),
|
||||
(u'Immobilien-News', u'http://rss2.focus.de/c/32191/f/443318/index.rss'),
|
||||
(u'Kultur-News', u'http://rss2.focus.de/c/32191/f/443321/index.rss'),
|
||||
(u'Panorama-News', u'http://rss2.focus.de/c/32191/f/533877/index.rss'),
|
||||
(u'Politik-News', u'http://rss2.focus.de/c/32191/f/443313/index.rss'),
|
||||
(u'Reisen-News', u'http://rss2.focus.de/c/32191/f/443316/index.rss'),
|
||||
(u'Sport-News', u'http://rss2.focus.de/c/32191/f/443319/index.rss'),
|
||||
(u'Wissen-News', u'http://rss2.focus.de/c/32191/f/533876/index.rss'),
|
||||
]
|
@ -21,14 +21,19 @@ class Fronda(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
|
||||
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':'big'}),
|
||||
dict(name='ul', attrs={'class':'about clear'}),
|
||||
dict(name='div', attrs={'class':'content'})]
|
||||
keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}),
|
||||
dict(name='div', attrs={'class':'naglowek_tresc'}),
|
||||
dict(name='div', attrs={'id':'czytaj'}) ]
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'print'})]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[ (r'<a href="#" class="print">Drukuj</a>', lambda match: ''),
|
||||
(r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
|
||||
[ (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
|
||||
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
|
||||
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
|
||||
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>') ]
|
||||
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>'),
|
||||
(r'<p[^>]*> </p>', lambda match: ''),
|
||||
(r'<p><span style=".*?"><br /></span></p> ', lambda match: ''),
|
||||
(r'<a style=\'float:right;margin-top:3px;\' href="http://www.facebook.com/share.php?.*?</a>', lambda match: '')]
|
||||
]
|
||||
|
38
recipes/glamour.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
|
||||
title = u'Glamour (US)'
|
||||
oldest_article = 21
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
__author__ = 'Anonymous'
|
||||
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?printable=true'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
feeds = [ (u'All Fashion', u'http://feeds.glamour.com/glamour/all_fashion'),
|
||||
(u'All Beauty', u'http://feeds.glamour.com/glamour/all_beauty'),
|
||||
(u'All Sex, Love & Life', u'http://feeds.glamour.com/glamour/sex_love_life'),
|
||||
(u'All Health & Fitness', u'http://feeds.glamour.com/glamour/health_fitness'),
|
||||
(u'Shopping', u'http://feeds.glamour.com/glamour/shopping'),
|
||||
(u'Slaves to Fashion blog', u'http://feeds.glamour.com/glamour/slavestofashion'),
|
||||
(u'The Girls in the Beauty Department', u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
|
||||
(u'Smitten blog', u'http://feeds.glamour.com/glamour/smitten'),
|
||||
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
|
||||
(u'Single-ish blog', u'http://feeds.glamour.com/glamour/glamoursingle-ish'),
|
||||
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
|
||||
(u'Vitamin G blog', u'http://feeds.glamour.com/glamour/vitamin-g'),
|
||||
(u'Margarita Shapes Up blog', u'http://feeds.glamour.com/glamour/margaritashapesup'),
|
||||
(u'Little Miss Fortune blog', u'http://feeds.glamour.com/glamour/little-miss-fortune'),
|
||||
]
|
@ -6,13 +6,13 @@ __copyright__ = 'Copyright 2010 Starson17'
|
||||
www.gocomics.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import mechanize
|
||||
import mechanize, re
|
||||
|
||||
class GoComics(BasicNewsRecipe):
|
||||
title = 'GoComics'
|
||||
__author__ = 'Starson17'
|
||||
__version__ = '1.03'
|
||||
__date__ = '09 October 2010'
|
||||
__version__ = '1.05'
|
||||
__date__ = '19 may 2011'
|
||||
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
|
||||
category = 'news, comics'
|
||||
language = 'en'
|
||||
@ -20,6 +20,7 @@ class GoComics(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
cover_url = 'http://paulbuckley14059.files.wordpress.com/2008/06/calvin-and-hobbes.jpg'
|
||||
remove_attributes = ['style']
|
||||
|
||||
####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
|
||||
# num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
|
||||
@ -40,6 +41,8 @@ class GoComics(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
|
||||
dict(name='div', attrs={'class':['tag-wrapper']}),
|
||||
dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
|
||||
dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
|
||||
dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
|
||||
]
|
||||
|
||||
|
@ -1,83 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class golem_ger(BasicNewsRecipe):
|
||||
title = u'Golem.de'
|
||||
language = 'de'
|
||||
__author__ = 'Kovid Goyal'
|
||||
__author__ = 'schuster'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
no_stylesheets = True
|
||||
encoding = 'iso-8859-1'
|
||||
recursions = 1
|
||||
match_regexps = [r'http://www.golem.de/.*.html']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'artikelhead'}),
|
||||
dict(name='p', attrs={'class':'teaser'}),
|
||||
dict(name='div', attrs={'class':'artikeltext'}),
|
||||
dict(name='h2', attrs={'id':'artikelhead'}),
|
||||
]
|
||||
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}),
|
||||
dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}),
|
||||
dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}),
|
||||
dict(name='td', attrs={'class':['xsmall']}),
|
||||
]
|
||||
|
||||
|
||||
# remove_tags_after = [
|
||||
# dict(name='div', attrs={'id':['contentad2']})
|
||||
# ]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
|
||||
(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
|
||||
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
|
||||
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
|
||||
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
|
||||
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'),
|
||||
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
|
||||
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'),
|
||||
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
|
||||
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
|
||||
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
|
||||
(u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
|
||||
(u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
|
||||
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
|
||||
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
|
||||
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
|
||||
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'),
|
||||
(u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0')
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
|
||||
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
|
||||
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
|
||||
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
|
||||
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
|
||||
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
|
||||
]
|
||||
|
||||
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
cover_url = 'http://www.e-energy.de/images/logo_golem.jpg'
|
||||
masthead_url = 'http://www.golem.de/staticrl/images/logo.png'
|
||||
extra_css = '''
|
||||
h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;}
|
||||
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
|
||||
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
|
||||
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
|
||||
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
|
||||
.teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
|
||||
.xsmall{font-style:italic;font-size:x-small;}
|
||||
.td{font-style:italic;font-size:x-small;}
|
||||
img {align:left;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
|
||||
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
||||
|
||||
'''
|
||||
remove_javascript = True
|
||||
remove_tags_befor = [dict(name='header', attrs={'class':'cluster-header'})]
|
||||
remove_tags_after = [dict(name='p', attrs={'class':'meta'})]
|
||||
remove_tags = [dict(rel='nofollow'),
|
||||
dict(name='header', attrs={'id':'header'}),
|
||||
dict(name='div', attrs={'class':'dh1'}),
|
||||
dict(name='label', attrs={'class':'implied'}),
|
||||
dict(name='section', attrs={'id':'comments'}),
|
||||
dict(name='li', attrs={'class':'gg_prebackcounterItem'}),
|
||||
dict(name='li', attrs={'class':'gg_prebackcounterItem gg_embeddedIndexCounter'}),
|
||||
dict(name='img', attrs={'class':'gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer'}),
|
||||
dict(name='div', attrs={'target':'_blank'})
|
||||
]
|
||||
|
||||
def get_browser(self, *args, **kwargs):
|
||||
from calibre import browser
|
||||
kwargs['user_agent'] = 'mozilla'
|
||||
return browser(*args, **kwargs)
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('id', article.get('guid', None))
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
feeds = [(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
|
||||
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
|
||||
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
|
||||
(u'Handy', u'http://rss.golem.de/rss.php?tp=handy&feed=RSS2.0'),
|
||||
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS2.0'),
|
||||
(u'Mobile', u'http://rss.golem.de/rss.php?tp=mc&feed=RSS2.0'),
|
||||
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
|
||||
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
|
||||
(u'Security', u'http://rss.golem.de/rss.php?tp=sec&feed=RSS2.0'),
|
||||
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
|
||||
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
|
||||
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
|
||||
(u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
|
||||
(u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
|
||||
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
|
||||
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
|
||||
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
|
||||
(u'Wirtschaft', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0'),
|
||||
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0')
|
||||
|
||||
]
|
||||
|
||||
|
31
recipes/good_house_keeping.recipe
Normal file
@ -0,0 +1,31 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
|
||||
title = u'Good House Keeping'
|
||||
language = 'en'
|
||||
__author__ = 'Anonymous'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
|
||||
def print_version(self,url):
|
||||
segments = url.split('/')
|
||||
printURL = '/'.join(segments[0:3]) + '/print-this/' + '/'.join(segments[4:])
|
||||
return printURL
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
feeds = [ (u'Recipes & Entertaining', u'http://www.goodhousekeeping.com/food/food-rss/?src=rss'),
|
||||
(u'Home & House', u'http://www.goodhousekeeping.com/home/home-rss/?src=rss'),
|
||||
(u'Diet & Health', u'http://www.goodhousekeeping.com/health/health-rss/?src=rss'),
|
||||
(u'Beauty & Style', u'http://www.goodhousekeeping.com/beauty/beauty-rss/?src=rss'),
|
||||
(u'Family & Pets', u'http://www.goodhousekeeping.com/family/family-rss/?src=rss'),
|
||||
(u'Saving Money', u'http://www.goodhousekeeping.com/money/money-rss/?src=rss'),
|
||||
]
|
32
recipes/good_to_know.recipe
Normal file
@ -0,0 +1,32 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
|
||||
title = u'Good to Know (uk)'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
__author__ = 'Anonymous'
|
||||
language = 'en_GB'
|
||||
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '/print/1'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'),
|
||||
(u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'),
|
||||
(u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'),
|
||||
(u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'),
|
||||
(u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'),
|
||||
(u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'),
|
||||
(u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'),
|
||||
(u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'),
|
||||
]
|
36
recipes/grrm.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
grrm.livejournal.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NotABlog(BasicNewsRecipe):
|
||||
title = 'Not A Blog - George R.R. Martin'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'George R.R. Martin'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publication_type = 'blog'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : 'sf, fantasy, game of thrones'
|
||||
, 'publisher': 'George R.R. Martin'
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
BIN
recipes/icons/dilemaveche.png
Normal file
After Width: | Height: | Size: 558 B |
BIN
recipes/icons/divahair.png
Normal file
After Width: | Height: | Size: 675 B |
Before Width: | Height: | Size: 550 B After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/financialsense.png
Normal file
After Width: | Height: | Size: 702 B |
BIN
recipes/icons/iprofesional.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/marca.png
Normal file
After Width: | Height: | Size: 293 B |
BIN
recipes/icons/mayra.png
Normal file
After Width: | Height: | Size: 620 B |
BIN
recipes/icons/moldovaazi.png
Normal file
After Width: | Height: | Size: 243 B |
BIN
recipes/icons/natgeo.png
Normal file
After Width: | Height: | Size: 247 B |
BIN
recipes/icons/newsmoldova.png
Normal file
After Width: | Height: | Size: 837 B |
BIN
recipes/icons/osnews_pl.png
Normal file
After Width: | Height: | Size: 1006 B |
BIN
recipes/icons/replicavedetelor.png
Normal file
After Width: | Height: | Size: 709 B |
BIN
recipes/icons/rmf24_opinie.png
Normal file
After Width: | Height: | Size: 722 B |
BIN
recipes/icons/rzeczpospolita.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
recipes/icons/swiatkindle.png
Normal file
After Width: | Height: | Size: 425 B |
BIN
recipes/icons/the_nation.png
Normal file
After Width: | Height: | Size: 925 B |
BIN
recipes/icons/wash_post.png
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
recipes/icons/ziuaveche.png
Normal file
After Width: | Height: | Size: 554 B |
32
recipes/impulse_de.recipe
Normal file
@ -0,0 +1,32 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
|
||||
title = u'Impulse.de'
|
||||
language = 'de'
|
||||
__author__ = 'schuster'
|
||||
oldest_article =14
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.bvk.de/files/image/bilder/Logo%20Impulse.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
def print_version(self, url):
|
||||
return url.replace ('#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', '?mode=print')
|
||||
remove_tags_bevor = [dict(name='h1', attrs={'class':'h2'})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
|
||||
|
||||
feeds = [ (u'impulstest', u'http://www.impulse.de/rss/')]
|
||||
|
||||
|
||||
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'kasten_artikel']}),
|
||||
dict(id=['metaNav', 'impKopf', 'impTopNav', 'impSubNav', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere', 'socialmedia', 'rating_open']),
|
||||
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
|
||||
dict(rel=['canonical'])]
|
||||
|
79
recipes/iprofesional.recipe
Normal file
@ -0,0 +1,79 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.iprofesional.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class iProfesional(BasicNewsRecipe):
|
||||
title = 'iProfesional.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Las ultimas noticias sobre profesionales'
|
||||
publisher = 'Emprendimientos Corporativos S.A.'
|
||||
category = 'news, IT, impuestos, negocios, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'nesportal'
|
||||
masthead_url = 'http://www.iprofesional.com/img/logo-iprofesional.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
.titulo-interior{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
.autor-nota{font-size: small; font-weight: bold; font-style: italic; color: gray}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['fecha','interior-nota']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','base','embed','object','iframe'])
|
||||
,dict(attrs={'class':['menu-imprimir','guardarNota','IN-widget','fin','permalink']})
|
||||
]
|
||||
remove_attributes=['lang','xmlns:og','xmlns:fb']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias' , u'http://feeds.feedburner.com/iprofesional-principales-noticias')
|
||||
,(u'Finanzas' , u'http://feeds.feedburner.com/iprofesional-finanzas' )
|
||||
,(u'Impuestos' , u'http://feeds.feedburner.com/iprofesional-impuestos' )
|
||||
,(u'Negocios' , u'http://feeds.feedburner.com/iprofesional-economia' )
|
||||
,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior' )
|
||||
,(u'Tecnologia' , u'http://feeds.feedburner.com/iprofesional-tecnologia' )
|
||||
,(u'Management' , u'http://feeds.feedburner.com/iprofesional-managment' )
|
||||
,(u'Marketing' , u'http://feeds.feedburner.com/iprofesional-marketing' )
|
||||
,(u'Legales' , u'http://feeds.feedburner.com/iprofesional-legales' )
|
||||
,(u'Autos' , u'http://feeds.feedburner.com/iprofesional-autos' )
|
||||
,(u'Vinos' , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
@ -3,8 +3,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1295262156(BasicNewsRecipe):
|
||||
title = u'kath.net'
|
||||
__author__ = 'Bobus'
|
||||
description = u'Katholische Nachrichten'
|
||||
oldest_article = 7
|
||||
language = 'en'
|
||||
language = 'de'
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'kath.net', u'http://www.kath.net/2005/xml/index.xml')]
|
||||
|
44
recipes/liberty_times.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
||||
title = u'自由電子報'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'),
|
||||
(u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'),
|
||||
(u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'),
|
||||
(u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'),
|
||||
(u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'),
|
||||
(u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'),
|
||||
(u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'),
|
||||
(u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'),
|
||||
(u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'),
|
||||
(u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'),
|
||||
(u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'),
|
||||
(u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'),
|
||||
(u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'),
|
||||
(u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'),
|
||||
(u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'),
|
||||
]
|
||||
extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
|
||||
__author__ = 'einstuerzende, updated by Eddie Lau'
|
||||
__version__ = '1.1'
|
||||
language = 'zh'
|
||||
publisher = 'Liberty Times Group'
|
||||
description = 'Liberty Times (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
|
||||
cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
|
||||
keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]
|
||||
|
@ -1,14 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.marca.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Marca(BasicNewsRecipe):
|
||||
title = 'Marca'
|
||||
@ -22,35 +19,30 @@ class Marca(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
encoding = 'iso-8859-15'
|
||||
language = 'es'
|
||||
language = 'es_ES'
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://estaticos.marca.com/deporte/img/v3.0/img_marca-com.png'
|
||||
extra_css = """
|
||||
body{font-family: Tahoma,Geneva,sans-serif}
|
||||
h1,h2,h3,h4,h5,h6{font-family: 'LatoBlack',Tahoma,Geneva,sans-serif}
|
||||
.cab_articulo h4 {font-family: Georgia,"Times New Roman",Times,serif}
|
||||
.antetitulo{text-transform: uppercase}
|
||||
"""
|
||||
|
||||
direction = 'ltr'
|
||||
feeds = [(u'Portada', u'http://estaticos.marca.com/rss/portada.xml')]
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'Portada', u'http://rss.marca.com/rss/descarga.htm?data2=425')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','col_izq']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='div', attrs={'class':['colC','peu']})
|
||||
,dict(name='div', attrs={'class':['utilidades estirar','bloque_int_corr estirar']})
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','cuerpo_articulo']})]
|
||||
remove_attributes = ['lang']
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','embed','iframe','meta','base'])
|
||||
,dict(name='div', attrs={'class':'tabs'})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'bloque_int_corr estirar'})]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir' ] = self.direction
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
|
22
recipes/max_planck.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'Max-Planck-Inst.'
|
||||
__author__ = 'schuster'
|
||||
remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
|
||||
dict(id=['ie_clearing', 'col2', 'col2_content']),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
|
||||
def print_version(self, url):
|
||||
split_url = url.split("/")
|
||||
print_url = 'http://www.mpg.de/print/' + split_url[3]
|
||||
return print_url
|
||||
|
||||
feeds = [(u'Forschung', u'http://www.mpg.de/de/forschung.rss')]
|
||||
|
51
recipes/mayra.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
mayra.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Mayra(BasicNewsRecipe):
|
||||
title = u'Mayra'
|
||||
language = 'ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Traieste urban, cool, sexy'
|
||||
publisher = 'Mayra'
|
||||
category = 'Ziare,Stiri,Reviste'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://img.konkurs.ro/img/concursuri-cu-premii/147/14672_front.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'article_details'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'LikePluginPagelet'})
|
||||
, dict(name='p', attrs={'id':'tags'})
|
||||
, dict(name='span', attrs={'id':'tweet-button'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'LikePluginPagelet'})
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://www.mayra.ro/rss') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
10
recipes/mens_health.recipe
Normal file
@ -0,0 +1,10 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1305636254(BasicNewsRecipe):
|
||||
title = u'Mens Health (US)'
|
||||
language = 'en'
|
||||
__author__ = 'Anonymous'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'News', u'http://blogs.menshealth.com/health-headlines/feed')]
|
@ -1,15 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Eddie Lau'
|
||||
|
||||
# Users of Kindle 3 (with limited system-level CJK support)
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
__MakePeriodical__ = True
|
||||
# Turn it to True if your device supports display of CJK titles
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
__UseChineseTitle__ = False
|
||||
|
||||
# Trun below to true if you wish to use life.mingpao.com as the main article source
|
||||
__UseLife__ = True
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||
2011/02/28: rearrange the sections
|
||||
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||
@ -32,41 +35,43 @@ import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
class MPHKRecipe(BasicNewsRecipe):
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
publisher = 'MingPao'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'Eddie Lau'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
publisher = 'MingPao'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['photo']})
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
]
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page
|
||||
dict(name='table')] # for content fetched from life.mingpao.com
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='table')] # for content fetched from life.mingpao.com
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<h1>'),
|
||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||
@ -80,10 +85,10 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
lambda match: "</b>")
|
||||
]
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
@ -115,314 +120,357 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return dt_local
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchday(self):
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
def get_fetchday(self):
|
||||
# dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
def get_cover_url(self):
|
||||
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
if __UseLife__:
|
||||
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
||||
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
||||
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
|
||||
return feeds
|
||||
return feeds
|
||||
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
# parse from news.mingpao.com
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_fin_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ent_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
def parse_fin_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
|
||||
title = self.tag_to_string(i)
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
|
||||
def parse_col_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
def parse_ent_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
def parse_col_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
50
recipes/moldovaazi.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
azi.md
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MoldovaAzi(BasicNewsRecipe):
|
||||
title = u'Moldova Azi'
|
||||
language = 'ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Moldova pe internet'
|
||||
publisher = 'Moldova Azi'
|
||||
category = 'Ziare,Stiri,Moldova'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.azi.md/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'in'})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'in-more-stories'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'comment_wrapper'})
|
||||
, dict(name='div', attrs={'class':'box-title4'})
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://www.azi.md/ro/feeds/0/rss201') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
71
recipes/natgeo.recipe
Normal file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, gagsays <gagsays at gmail dot com>'
|
||||
'''
|
||||
nationalgeographic.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class NatGeo(BasicNewsRecipe):
|
||||
title = u'National Geographic'
|
||||
description = 'Daily news articles from The National Geographic'
|
||||
language = 'en'
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 25
|
||||
encoding = 'utf8'
|
||||
publisher = 'nationalgeographic.com'
|
||||
category = 'science, nat geo'
|
||||
__author__ = 'gagsays'
|
||||
masthead_url = 'http://s.ngeo.com/wpf/sites/themes/global/i/presentation/ng_logo_small.png'
|
||||
description = 'Inspiring people to care about the planet since 1888'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
extra_css = '''
|
||||
body {color: #000000;font-size: medium;}
|
||||
h1 {color: #222222; font-size: large; font-weight:lighter; text-decoration:none; text-align: center;font-family:Georgia,Times New Roman,Times,serif;}
|
||||
h2 {color: #454545; font-size: small; font-weight:lighter; text-decoration:none; text-align: justify; font-style:italic;font-family :Georgia,Times New Roman,Times,serif;}
|
||||
h3 {color: #555555; font-size: small; font-style:italic; margin-top: 10px;}
|
||||
img{margin-bottom: 0.25em;display:block;margin-left: auto;margin-right: auto;}
|
||||
a:link,a,.a,href {text-decoration: none;color: #000000;}
|
||||
.caption{color: #000000;font-size: xx-small;text-align: justify;font-weight:normal;}
|
||||
.credit{color: #555555;font-size: xx-small;text-align: left;font-weight:lighter;}
|
||||
p.author,p.publication{color: #000000;font-size: xx-small;text-align: left;display:inline;}
|
||||
p.publication_time{color: #000000;font-size: xx-small;text-align: right;text-decoration: underline;}
|
||||
p {margin-bottom: 0;}
|
||||
p + p {text-indent: 1.5em;margin-top: 0;}
|
||||
.hidden{display:none;}
|
||||
#page_head{text-transform:uppercase;}
|
||||
'''
|
||||
|
||||
def parse_feeds (self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for feed in feeds:
|
||||
for article in feed.articles[:]:
|
||||
if 'Presented' in article.title or 'Pictures' in article.title:
|
||||
feed.articles.remove(article)
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
remove_tags_before = dict(id='page_head')
|
||||
keep_only_tags = [
|
||||
dict(name='div',attrs={'id':['page_head','content_mainA']})
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(name='div',attrs={'class':['article_text','promo_collection']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['aside','primary full_width']})
|
||||
,dict(name='div', attrs={'id':['header_search','navigation_mainB_wrap']})
|
||||
]
|
||||
feeds = [
|
||||
(u'Daily News', u'http://feeds.nationalgeographic.com/ng/News/News_Main')
|
||||
]
|
||||
|
25
recipes/national_geographic_de.recipe
Normal file
@ -0,0 +1,25 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
|
||||
title = u'National Geographic (DE)'
|
||||
__author__ = 'Anonymous'
|
||||
language = 'de'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 1000
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.nationalgeographic.de/images/national-geographic-logo.jpg'
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['contentbox_no_top_border']}) ]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'related'}),
|
||||
dict(name='li', attrs={'class':'first'}),
|
||||
dict(name='div', attrs={'class':'extrasbox_inner'}),
|
||||
|
||||
]
|
||||
|
||||
feeds = [ (u'National Geographic', u'http://feeds.nationalgeographic.de/ng-neueste-artikel'),
|
||||
|
||||
]
|
50
recipes/newsmoldova.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
newsmoldova.md
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewsMoldova(BasicNewsRecipe):
|
||||
title = u'Agen\u0163ia de \u015ftiri Moldova'
|
||||
language = 'ro'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Agen\u0163ia de \u015ftiri Moldova'
|
||||
publisher = 'Moldova'
|
||||
category = 'Ziare,Stiri,Moldova'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.newsmoldova.md/i/logo_top_md.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'main-article-index article'})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'actions'})
|
||||
, dict(name='li', attrs={'class':'invisible'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'actions'})
|
||||
]
|
||||
|
||||
feeds = [ (u'\u0218tiri', u'http://newsmoldova.md/export/rss2/archive/index.xml') ]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -11,6 +11,20 @@ class Newsweek(BasicNewsRecipe):
|
||||
|
||||
BASE_URL = 'http://www.newsweek.com'
|
||||
|
||||
topics = {
|
||||
'Culture' : '/tag/culture.html',
|
||||
'Business' : '/tag/business.html',
|
||||
'Society' : '/tag/society.html',
|
||||
'Science' : '/tag/science.html',
|
||||
'Education' : '/tag/education.html',
|
||||
'Politics' : '/tag/politics.html',
|
||||
'Health' : '/tag/health.html',
|
||||
'World' : '/tag/world.html',
|
||||
'Nation' : '/tag/nation.html',
|
||||
'Technology' : '/tag/technology.html',
|
||||
'Game Changers' : '/tag/game-changers.html',
|
||||
}
|
||||
|
||||
keep_only_tags = dict(name='article', attrs={'class':'article-text'})
|
||||
remove_tags = [dict(attrs={'data-dartad':True})]
|
||||
remove_attributes = ['property']
|
||||
@ -21,14 +35,10 @@ class Newsweek(BasicNewsRecipe):
|
||||
return soup
|
||||
|
||||
def newsweek_sections(self):
|
||||
return [
|
||||
('Nation', 'http://www.newsweek.com/tag/nation.html'),
|
||||
('Society', 'http://www.newsweek.com/tag/society.html'),
|
||||
('Culture', 'http://www.newsweek.com/tag/culture.html'),
|
||||
('World', 'http://www.newsweek.com/tag/world.html'),
|
||||
('Politics', 'http://www.newsweek.com/tag/politics.html'),
|
||||
('Business', 'http://www.newsweek.com/tag/business.html'),
|
||||
]
|
||||
for topic_name, topic_url in self.topics.iteritems():
|
||||
yield (topic_name,
|
||||
self.BASE_URL+topic_url)
|
||||
|
||||
|
||||
def newsweek_parse_section_page(self, soup):
|
||||
for article in soup.findAll('article', about=True,
|
||||
|
29
recipes/ngz.recipe
Normal file
@ -0,0 +1,29 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'NGZ-online'
|
||||
__author__ = 'schuster'
|
||||
remove_tags_before = dict(id='bu')
|
||||
remove_tags_after = dict(id='noblock')
|
||||
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix', 'liketext']}),
|
||||
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'Verlinken', 'vorheriger', 'LESERKOMMENTARE', 'bei facebook', 'bei twitter', 'Schreiben Sie jetzt Ihre Meinung:', 'Thema', 'Ihr Beitrag', 'Ihr Name', 'Ich möchte über weitere Lesermeinungen zu diesem Artikel per E-Mail informiert werden.', 'banneroben', 'bannerrechts', 'inserieren', 'stellen', 'auto', 'immobilien', 'kleinanzeige', 'tiere', 'ferienwohnung', 'NGZ Card', 'Mediengruppe RP', 'Werben', 'Newsletter', 'Wetter', 'RSS', 'Abo', 'Anzeigen', 'Redaktion', 'Schulprojekte', 'Gast', 'Mein NGZ', 'Nachrichten', 'Sport', 'Wirtschaft', 'Stadt-Infos', 'Bilderserien', 'Bookmarken', 'del.icio.us', 'Mister Wong', 'YiGG', 'Webnews', 'Shortnews', 'Twitter', 'Newsider', 'Facebook', 'StudiVZ/MeinVZ', 'Versenden', 'Drucken']),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.rhein-kreis-neuss-macht-sport.de/sport/includes/bilder/ngz_logo.jpg'
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?ot=de.circit.rpo.PopupPageLayout.ot'
|
||||
feeds = [
|
||||
(u'Grevenbroich', u'http://www.ngz-online.de/app/feed/rss/grevenbroich'),
|
||||
(u'Kreis Neuss', u'http://www.ngz-online.de/app/feed/rss/rheinkreisneuss'),
|
||||
(u'Dormagen', u'http://www.ngz-online.de/app/feed/rss/dormagen'),
|
||||
(u'J\xfcchen', u'http://www.ngz-online.de/app/feed/rss/juechen'),
|
||||
(u'Rommerskirchen', u'http://www.ngz-online.de/app/feed/rss/rommerskirchen')
|
||||
|
||||
]
|
||||
|
22
recipes/pro_physik.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'Pro Physik'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 4
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.pro-physik.de/Phy/images/site/prophysik_logo1.jpg'
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('leadArticle.do', 'print.do')
|
||||
|
||||
|
||||
feeds = [(u'Hightech', u'http://www.pro-physik.de/Phy/hightechfeed.xml'),
|
||||
(u'Forschung', u'http://www.pro-physik.de/Phy/forschungfeed.xml'),
|
||||
(u'Magazin', u'http://www.pro-physik.de/Phy/magazinfeed.xml')]
|
||||
|
@ -3,7 +3,6 @@ __license__ = 'GPL v3'
|
||||
'''
|
||||
'''
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.web.feeds import Feed
|
||||
|
||||
|
||||
class ReadersDigest(BasicNewsRecipe):
|
||||
@ -38,151 +37,20 @@ class ReadersDigest(BasicNewsRecipe):
|
||||
'''
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='h4', attrs={'class':'close'}),
|
||||
dict(name='div', attrs={'class':'fromLine'}),
|
||||
dict(name='img', attrs={'class':'colorTag'}),
|
||||
dict(name='div', attrs={'id':'sponsorArticleHeader'}),
|
||||
dict(name='div', attrs={'class':'horizontalAd'}),
|
||||
dict(name='div', attrs={'id':'imageCounterLeft'}),
|
||||
dict(name='div', attrs={'id':'commentsPrint'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
('New in RD', 'http://feeds.rd.com/ReadersDigest'),
|
||||
('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
|
||||
('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
|
||||
('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
|
||||
('Food', 'http://www.rd.com/food/feed'),
|
||||
('Health', 'http://www.rd.com/health/feed'),
|
||||
('Home', 'http://www.rd.com/home/feed'),
|
||||
('Family', 'http://www.rd.com/family/feed'),
|
||||
('Money', 'http://www.rd.com/money/feed'),
|
||||
('Travel', 'http://www.rd.com/travel/feed'),
|
||||
]
|
||||
|
||||
cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def print_version(self, url):
|
||||
|
||||
# Get the identity number of the current article and append it to the root print URL
|
||||
|
||||
if url.find('/article') > 0:
|
||||
ident = url[url.find('/article')+8:url.find('.html?')-4]
|
||||
url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
|
||||
|
||||
elif url.find('/post') > 0:
|
||||
|
||||
# in this case, have to get the page itself to derive the Print page.
|
||||
soup = self.index_to_soup(url)
|
||||
newsoup = soup.find('ul',attrs={'class':'printBlock'})
|
||||
url = 'http://www.rd.com' + newsoup('a')[0]['href']
|
||||
url = url[0:url.find('&Keep')]
|
||||
|
||||
return url
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
pages = [
|
||||
('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
|
||||
# useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
|
||||
('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
|
||||
|
||||
keep_only_tags = dict(id='main-content')
|
||||
remove_tags = [
|
||||
{'class':['post-categories']},
|
||||
]
|
||||
|
||||
feeds = []
|
||||
|
||||
for page in pages:
|
||||
section, url, divider, attrList = page
|
||||
newArticles = self.page_parse(url, divider, attrList)
|
||||
feeds.append((section,newArticles))
|
||||
|
||||
# after the pages of the site have been processed, parse several RSS feeds for additional sections
|
||||
newfeeds = Feed()
|
||||
newfeeds = self.parse_rss()
|
||||
|
||||
|
||||
# The utility code in parse_rss returns a Feed object. Convert each feed/article combination into a form suitable
|
||||
# for this module (parse_index).
|
||||
|
||||
for feed in newfeeds:
|
||||
newArticles = []
|
||||
for article in feed.articles:
|
||||
newArt = {
|
||||
'title' : article.title,
|
||||
'url' : article.url,
|
||||
'date' : article.date,
|
||||
'description' : article.text_summary
|
||||
}
|
||||
newArticles.append(newArt)
|
||||
|
||||
|
||||
# New and Blogs should be the first two feeds.
|
||||
if feed.title == 'New in RD':
|
||||
feeds.insert(0,(feed.title,newArticles))
|
||||
elif feed.title == 'Blogs':
|
||||
feeds.insert(1,(feed.title,newArticles))
|
||||
else:
|
||||
feeds.append((feed.title,newArticles))
|
||||
|
||||
|
||||
return feeds
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def page_parse(self, mainurl, divider, attrList):
|
||||
|
||||
articles = []
|
||||
mainsoup = self.index_to_soup(mainurl)
|
||||
for item in mainsoup.findAll(attrs=attrList):
|
||||
newArticle = {
|
||||
'title' : item('img')[0]['alt'],
|
||||
'url' : 'http://www.rd.com'+item('a')[0]['href'],
|
||||
'date' : '',
|
||||
'description' : ''
|
||||
}
|
||||
articles.append(newArticle)
|
||||
|
||||
|
||||
|
||||
return articles
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
||||
def parse_rss (self):
|
||||
|
||||
# Do the "official" parse_feeds first
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
|
||||
# Loop thru the articles in all feeds to find articles with "recipe" in it
|
||||
recipeArticles = []
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if curarticle.title.upper().find('RECIPE') >= 0:
|
||||
recipeArticles.append(curarticle)
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
|
||||
# If there are any recipes found, create a new Feed object and append.
|
||||
if len(recipeArticles) > 0:
|
||||
pfeed = Feed()
|
||||
pfeed.title = 'Recipes'
|
||||
pfeed.descrition = 'Recipe Feed (Virtual)'
|
||||
pfeed.image_url = None
|
||||
pfeed.oldest_article = 30
|
||||
pfeed.id_counter = len(recipeArticles)
|
||||
# Create a new Feed, add the recipe articles, and then append
|
||||
# to "official" list of feeds
|
||||
pfeed.articles = recipeArticles[:]
|
||||
feeds.append(pfeed)
|
||||
|
||||
return feeds
|
||||
|
||||
|
53
recipes/replicavedetelor.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, '
|
||||
'''
|
||||
replicavedetelor.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ReplicaVedetelor(BasicNewsRecipe):
|
||||
title = u'Replica Vedetelor'
|
||||
__author__ = u'Silviu Cotoara'
|
||||
description = u'Ofer\u0103 vedetelor dreptul la replic\u0103'
|
||||
publisher = 'Replica Vedetelor'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Vedete'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.webart-software.eu/_pics/lucrari_referinta/medium/84/1-Replica-Vedetelor.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'zona-continut'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'id':['lista-imagini']})
|
||||
, dict(name='form', attrs={'id':['f-trimite-unui-prieten']})
|
||||
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='form', attrs={'id':['f-trimite-unui-prieten']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.replicavedetelor.ro/feed')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
__author__ = u'kwetal and Tomasz Dlugosz'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
@ -38,6 +38,8 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
@ -48,6 +50,13 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
|
||||
.fot{font-size: x-small; color: #666666;}
|
||||
'''
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
if ('advertisement' in soup.find('title').string.lower()):
|
||||
href = soup.find('a').get('href')
|
||||
return self.index_to_soup(href, raw=True)
|
||||
else:
|
||||
return None
|
||||
|
||||
def print_version(self, url):
|
||||
start, sep, rest = url.rpartition('/')
|
||||
forget, sep, index = rest.rpartition(',')
|
||||
|
28
recipes/spektrum.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
title = u'Spektrum (der Wissenschaft)'
|
||||
__author__ = 'schuster'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'de'
|
||||
cover_url = 'http://upload.wikimedia.org/wikipedia/de/3/3b/Spektrum_der_Wissenschaft_Logo.svg'
|
||||
|
||||
remove_tags = [dict(attrs={'class':['hauptnaviPkt gainlayout', 'hauptnaviButton', 'suchButton', 'suchbegriffKasten', 'loginButton', 'subnavigation', 'artikelInfoLeiste gainlayout', 'artikelTools', 'nurLetzteSeite', 'link', 'boxUnterArtikel', 'leserbriefeBlock', 'boxTitel', 'boxInhalt', 'sehrklein', 'boxabstand', 'werbeboxinhalt', 'rbabstand', 'bildlinks', 'rechtebox', 'denkmalbox', 'denkmalfrage']}),
|
||||
dict(id=['pflip', 'verlagsleiste', 'bereich', 'bannerVertikal', 'headerLogoLink', 'kopf', 'topNavi', 'headerSchnellsuche', 'headerSchnellsucheWarten', 'navigation', 'navigationL', 'navigationR', 'inhalt', 'rechtespalte', 'sdwboxenshop', 'shopboxen', 'fuss']),
|
||||
dict(name=['naservice'])]
|
||||
|
||||
def print_version(self,url):
|
||||
newurl = url.replace('artikel/', 'sixcms/detail.php?id=')
|
||||
return newurl + '&_druckversion=1'
|
||||
|
||||
|
||||
|
||||
feeds = [(u'Spektrum der Wissenschaft', u'http://www.spektrum.de/artikel/982623'),
|
||||
(u'SpektrumDirekt', u'http://www.spektrumdirekt.de/artikel/996406'),
|
||||
(u'Sterne und Weltraum', u'http://www.astronomie-heute.de/artikel/865248'),
|
||||
(u'Gehirn & Geist', u'http://www.gehirn-und-geist.de/artikel/982626'),
|
||||
(u'epoc', u'http://www.epoc.de/artikel/982625')
|
||||
|
||||
]
|
||||
|
||||
filter_regexps = [r'ads\.doubleclick\.net']
|
@ -33,7 +33,7 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
|
||||
elif c.name.endswith('_password'):
|
||||
br[c.name] = self.password
|
||||
raw = br.submit().read()
|
||||
if '>Logout' not in raw:
|
||||
if 'You have been logged in' not in raw:
|
||||
raise ValueError('Failed to login, check your username and password')
|
||||
return br
|
||||
|
||||
|
24
recipes/technology_review_de.recipe
Normal file
@ -0,0 +1,24 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'Technology Review'
|
||||
__author__ = 'schuster'
|
||||
remove_tags_before = dict(id='keywords')
|
||||
remove_tags_after = dict(id='kommentar')
|
||||
remove_tags = [dict(attrs={'class':['navi_oben_pvg', 'navi_oben_tarifr', 'navi_oben_itm', 'navi_oben_eve', 'navi_oben_whi', 'navi_oben_abo', 'navi_oben_shop', 'navi_top_logo', 'navi_top_abschnitt', 'first']}),
|
||||
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
oldest_article = 4
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?view=print'
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Technik News', u'http://www.heise.de/tr/news-atom.xml') ]
|
||||
|
@ -1,17 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TelepolisNews(BasicNewsRecipe):
|
||||
title = u'Telepolis (News+Artikel)'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
__author__ = 'syntaxis'
|
||||
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
|
||||
description = 'News from telepolis'
|
||||
description = 'News from Telepolis'
|
||||
category = 'news'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
@ -20,14 +15,19 @@ class TelepolisNews(BasicNewsRecipe):
|
||||
encoding = "utf-8"
|
||||
language = 'de'
|
||||
|
||||
use_embedded_content =False
|
||||
|
||||
remove_empty_feeds = True
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
|
||||
|
||||
keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
|
||||
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]
|
||||
|
||||
keep_only_tags = [dict(name = 'div',attrs={'class':'head'}),dict(name = 'div',attrs={'class':'leftbox'}),dict(name='td',attrs={'class':'strict'})]
|
||||
remove_tags = [ dict(name='td',attrs={'class':'blogbottom'}),
|
||||
dict(name='div',attrs={'class':'forum'}), dict(name='div',attrs={'class':'social'}),dict(name='div',attrs={'class':'blog-letter p-news'}),
|
||||
dict(name='div',attrs={'class':'blog-sub'}),dict(name='div',attrs={'class':'version-div'}),dict(name='div',attrs={'id':'breadcrumb'})
|
||||
,dict(attrs={'class':'tp-url'}),dict(attrs={'class':'blog-name entry_'}) ]
|
||||
|
||||
remove_tags_after = [dict(name='span', attrs={'class':['breadcrumb']})]
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
|
||||
|
||||
@ -39,15 +39,8 @@ class TelepolisNews(BasicNewsRecipe):
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''if the linked article is of kind artikel don't take it'''
|
||||
if (article.link.count('artikel') > 1) :
|
||||
return None
|
||||
return article.link
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008 - 2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008 - 2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
thenation.com
|
||||
'''
|
||||
@ -16,10 +16,17 @@ class Thenation(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
masthead_url = 'http://www.thenation.com/sites/default/themes/thenation/images/logo-main.gif'
|
||||
exra_css = ' body{font-family: Arial,Helvetica,sans-serif;} .print-created{font-size: small;} .caption{display: block; font-size: x-small;} '
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
masthead_url = 'http://www.thenation.com/sites/default/themes/thenation/images/logo-main.gif'
|
||||
login_url = 'http://www.thenation.com/user?destination=%3Cfront%3E'
|
||||
publication_type = 'magazine'
|
||||
needs_subscription = 'optional'
|
||||
exra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif;}
|
||||
.print-created{font-size: small;}
|
||||
.caption{display: block; font-size: x-small;}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -28,13 +35,30 @@ class Thenation(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [ dict(attrs={'class':['print-title','print-created','print-content','print-links']}) ]
|
||||
remove_tags = [dict(name='link')]
|
||||
keep_only_tags = [dict(attrs={'class':['print-title','print-created','print-content','print-links']})]
|
||||
remove_tags = [dict(name=['link','iframe','base','meta','object','embed'])]
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [(u"Editor's Picks", u'http://www.thenation.com/rss/editors_picks')]
|
||||
feeds = [(u"Articles", u'http://www.thenation.com/rss/articles')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('.thenation.com/','.thenation.com/print/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open('http://www.thenation.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.login_url)
|
||||
br.select_form(nr=1)
|
||||
br['name'] = self.username
|
||||
br['pass'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.thenation.com/issue/')
|
||||
item = soup.find('div',attrs={'id':'cover-wrapper'})
|
||||
if item:
|
||||
return item.img['src']
|
||||
return None
|
||||
|
86
recipes/united_daily.recipe
Normal file
@ -0,0 +1,86 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class UnitedDaily(BasicNewsRecipe):
|
||||
title = u'聯合新聞網'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
|
||||
(u'政治', u'http://udn.com/udnrss/politics.xml'),
|
||||
(u'社會', u'http://udn.com/udnrss/social.xml'),
|
||||
(u'生活', u'http://udn.com/udnrss/life.xml'),
|
||||
(u'綜合', u'http://udn.com/udnrss/education.xml'),
|
||||
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
|
||||
(u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'),
|
||||
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
|
||||
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
|
||||
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
|
||||
(u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
|
||||
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
|
||||
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
|
||||
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
|
||||
(u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'),
|
||||
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
|
||||
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
|
||||
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
|
||||
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
|
||||
(u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'),
|
||||
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
|
||||
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
|
||||
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
|
||||
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
|
||||
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
|
||||
(u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'),
|
||||
(u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'),
|
||||
(u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'),
|
||||
(u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'),
|
||||
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
|
||||
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
|
||||
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
|
||||
(u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
|
||||
(u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
|
||||
(u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
|
||||
(u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
|
||||
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
|
||||
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
|
||||
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
|
||||
(u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'),
|
||||
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
|
||||
(u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'),
|
||||
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
|
||||
(u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'),
|
||||
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
|
||||
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
|
||||
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
|
||||
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
|
||||
(u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'),
|
||||
(u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'),
|
||||
]
|
||||
|
||||
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}'''
|
||||
|
||||
__author__ = 'Eddie Lau'
|
||||
__version__ = '1.1'
|
||||
language = 'zh-TW'
|
||||
publisher = 'United Daily News Group'
|
||||
description = 'United Daily (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
||||
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
||||
keep_only_tags = [dict(name='td', attrs={'class':['story_title']}),
|
||||
dict(name='div', attrs={'id':['story_title']}),
|
||||
dict(name='td', attrs={'class':['story_author']}),
|
||||
dict(name='div', attrs={'id':['story_author']}),
|
||||
dict(name='td', attrs={'class':['story']}),
|
||||
dict(name='div', attrs={'id':['story']}),
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]
|
||||
|
@ -1,64 +1,75 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.washingtonpost.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheWashingtonPost(BasicNewsRecipe):
|
||||
title = 'The Washington Post'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Leading source for news, video and opinion on politics, business, world and national news, science, travel, entertainment and more. Our local coverage includes reporting on education, crime, weather, traffic, real estate, jobs and cars for DC, Maryland and Virginia. Offering award-winning opinion writing, entertainment information and restaurant reviews.'
|
||||
publisher = 'The Washington Post Company'
|
||||
category = 'news, politics, USA'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
delay = 1
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.washingtonpost.com/rw/sites/twpweb/img/logos/twp_logo_300.gif'
|
||||
cover_url = strftime('http://www.washingtonpost.com/rw/WashingtonPost/Content/Epaper/%Y-%m-%d/Ax1.pdf')
|
||||
extra_css = """
|
||||
body{font-family: Georgia,serif }
|
||||
"""
|
||||
|
||||
class WashingtonPost(BasicNewsRecipe):
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
title = 'Washington Post'
|
||||
description = 'US political news'
|
||||
__author__ = 'Kovid Goyal'
|
||||
use_embedded_content = False
|
||||
max_articles_per_feed = 20
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
keep_only_tags = [dict(attrs={'id':['content','entryhead','entrytext']})]
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','iframe','base'])
|
||||
,dict(attrs={'id':'multimedia-leaf-page'})
|
||||
]
|
||||
remove_attributes= ['lang','property','epochtime','datetitle','pagetype','contenttype','comparetime']
|
||||
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [
|
||||
('Politics', 'http://www.washingtonpost.com/rss/politics'),
|
||||
('Nation', 'http://www.washingtonpost.com/rss/national'),
|
||||
('World', 'http://www.washingtonpost.com/rss/world'),
|
||||
('Business', 'http://www.washingtonpost.com/rss/business'),
|
||||
('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'),
|
||||
('Sports', 'http://www.washingtonpost.com/rss/sports'),
|
||||
('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'),
|
||||
('Opinions', 'http://www.washingtonpost.com/rss/opinions'),
|
||||
('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'),
|
||||
('Local', 'http://www.washingtonpost.com/rss/local'),
|
||||
('Investigations',
|
||||
'http://www.washingtonpost.com/rss/investigations'),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
{'class':lambda x: x and 'article-toolbar' in x},
|
||||
{'class':lambda x: x and 'quick-comments' in x},
|
||||
{'class':lambda x: x and 'tweet' in x},
|
||||
{'class':lambda x: x and 'article-related' in x},
|
||||
{'class':lambda x: x and 'hidden' in x.split()},
|
||||
{'class':lambda x: x and 'also-read' in x.split()},
|
||||
{'class':lambda x: x and 'partners-content' in x.split()},
|
||||
{'class':['module share', 'module ads', 'comment-vars', 'hidden',
|
||||
'share-icons-wrap', 'comments', 'flipper']},
|
||||
{'id':['right-rail', 'save-and-share']},
|
||||
{'width':'1', 'height':'1'},
|
||||
|
||||
]
|
||||
|
||||
keep_only_tags = dict(id=['content', 'article'])
|
||||
|
||||
def get_article_url(self, *args):
|
||||
ans = BasicNewsRecipe.get_article_url(self, *args)
|
||||
ans = ans.rpartition('?')[0]
|
||||
if ans.endswith('_video.html'):
|
||||
return None
|
||||
if 'ads.pheedo.com' in ans:
|
||||
return None
|
||||
#if not ans.endswith('_blog.html'):
|
||||
# return None
|
||||
return ans
|
||||
|
||||
(u'World' , u'http://feeds.washingtonpost.com/rss/world' )
|
||||
,(u'National' , u'http://feeds.washingtonpost.com/rss/national' )
|
||||
,(u'White House' , u'http://feeds.washingtonpost.com/rss/politics/whitehouse' )
|
||||
,(u'Business' , u'http://feeds.washingtonpost.com/rss/business' )
|
||||
,(u'Opinions' , u'http://feeds.washingtonpost.com/rss/opinions' )
|
||||
,(u'Investigations' , u'http://feeds.washingtonpost.com/rss/investigations' )
|
||||
,(u'Local' , u'http://feeds.washingtonpost.com/rss/local' )
|
||||
,(u'Entertainment' , u'http://feeds.washingtonpost.com/rss/entertainment' )
|
||||
,(u'Sports' , u'http://feeds.washingtonpost.com/rss/sports' )
|
||||
,(u'Redskins' , u'http://feeds.washingtonpost.com/rss/sports/redskins' )
|
||||
,(u'Special Reports', u'http://feeds.washingtonpost.com/rss/national/special-reports')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('_story.html', '_singlePage.html')
|
||||
if '_story.html' in url:
|
||||
return url.replace('_story.html','_print.html')
|
||||
return url
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = BasicNewsRecipe.get_article_url(self,article)
|
||||
if not 'washingtonpost.com' in link:
|
||||
self.log('Skipping adds:', link)
|
||||
return None
|
||||
for it in ['_video.html','_gallery.html','_links.html']:
|
||||
if it in link:
|
||||
self.log('Skipping non-article:', link)
|
||||
return None
|
||||
return link
|
||||
|
||||
|
20
recipes/welt_der_physik.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
|
||||
|
||||
title = u'Welt der Physik'
|
||||
__author__ = 'schuster'
|
||||
remove_tags_befor = [dict(name='div', attrs={'class':'inhalt_bild_text_printonly'})]
|
||||
remove_tags_after = [dict(name='span', attrs={'class':'clearinhalt_bild'})]
|
||||
remove_tags = [dict(attrs={'class':['invisible', 'searchfld', 'searchbtn', 'topnavi', 'topsearch']}),
|
||||
dict(id=['naservice', 'phservicemenu', '',]),
|
||||
dict(name=['naservice'])]
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
remove_javascript = True
|
||||
|
||||
|
||||
feeds = [(u'Nachrichten und Neuigkeiten', u'http://www.weltderphysik.de/rss/alles.xml')]
|
53
recipes/ziuaveche.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
ziuaveche.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ZiuaVeche(BasicNewsRecipe):
|
||||
title = u'Ziua Veche'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = 'Cotidian online'
|
||||
publisher = 'Ziua Veche'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Cotidiane,Stiri'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.ziuaveche.ro/wp-content/themes/tema/images/zv-logo-alb-old.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'singlePost'})
|
||||
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'LikePluginPagelet'})
|
||||
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'LikePluginPagelet'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.ziuaveche.ro/feed/rss')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -41,14 +41,20 @@ authors_completer_append_separator = False
|
||||
#: Author sort name algorithm
|
||||
# The algorithm used to copy author to author_sort
|
||||
# Possible values are:
|
||||
# invert: use "fn ln" -> "ln, fn" (the default algorithm)
|
||||
# invert: use "fn ln" -> "ln, fn"
|
||||
# copy : copy author to author_sort without modification
|
||||
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
|
||||
# nocomma : "fn ln" -> "ln fn" (without the comma)
|
||||
# When this tweak is changed, the author_sort values stored with each author
|
||||
# must be recomputed by right-clicking on an author in the left-hand tags pane,
|
||||
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
||||
# The author name suffixes are words that are ignored when they occur at the
|
||||
# end of an author name. The case of the suffix is ignored and trailing
|
||||
# periods are automatically handled.
|
||||
author_sort_copy_method = 'comma'
|
||||
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
|
||||
'MD', 'M.D', 'I', 'II', 'III', 'IV',
|
||||
'Junior', 'Senior')
|
||||
|
||||
#: Use author sort in Tag Browser
|
||||
# Set which author field to display in the tags pane (the list of authors,
|
||||
@ -345,3 +351,11 @@ send_news_to_device_location = "main"
|
||||
# work on all operating systems)
|
||||
server_listen_on = '0.0.0.0'
|
||||
|
||||
#: Unified toolbar on OS X
|
||||
# If you enable this option and restart calibre, the toolbar will be 'unified'
|
||||
# with the titlebar as is normal for OS X applications. However, doing this has
|
||||
# various bugs, for instance the minimum width of the toolbar becomes twice
|
||||
# what it should be and it causes other random bugs on some systems, so turn it
|
||||
# on at your own risk!
|
||||
unified_title_toolbar_on_osx = False
|
||||
|
||||
|
@ -3,10 +3,12 @@
|
||||
"divide": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x / y)\n",
|
||||
"uppercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.upper()\n",
|
||||
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
|
||||
"in_list": "def evaluate(self, formatter, kwargs, mi, locals, val, sep, pat, fv, nfv):\n l = [v.strip() for v in val.split(sep) if v.strip()]\n for v in l:\n if re.search(pat, v):\n return fv\n return nfv\n",
|
||||
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
|
||||
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
|
||||
"booksize": "def evaluate(self, formatter, kwargs, mi, locals):\n if mi.book_size is not None:\n try:\n return str(mi.book_size)\n except:\n pass\n return ''\n",
|
||||
"select": "def evaluate(self, formatter, kwargs, mi, locals, val, key):\n if not val:\n return ''\n vals = [v.strip() for v in val.split(',')]\n for v in vals:\n if v.startswith(key+':'):\n return v[len(key)+1:]\n return ''\n",
|
||||
"first_non_empty": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if args[i]:\n return args[i]\n i += 1\n return ''\n",
|
||||
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
|
||||
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
|
||||
"list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n",
|
||||
|
@ -12,7 +12,9 @@ is64bit = platform.architecture()[0] == '64bit'
|
||||
iswindows = re.search('win(32|64)', sys.platform)
|
||||
isosx = 'darwin' in sys.platform
|
||||
isfreebsd = 'freebsd' in sys.platform
|
||||
islinux = not isosx and not iswindows and not isfreebsd
|
||||
isnetbsd = 'netbsd' in sys.platform
|
||||
isbsd = isnetbsd or isfreebsd
|
||||
islinux = not isosx and not iswindows and not isbsd
|
||||
SRC = os.path.abspath('src')
|
||||
sys.path.insert(0, SRC)
|
||||
sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')
|
||||
|
@ -11,7 +11,7 @@ __all__ = [
|
||||
'build', 'build_pdf2xml', 'server',
|
||||
'gui',
|
||||
'develop', 'install',
|
||||
'resources',
|
||||
'kakasi', 'resources',
|
||||
'check',
|
||||
'sdist',
|
||||
'manual', 'tag_release',
|
||||
@ -49,8 +49,9 @@ gui = GUI()
|
||||
from setup.check import Check
|
||||
check = Check()
|
||||
|
||||
from setup.resources import Resources
|
||||
from setup.resources import Resources, Kakasi
|
||||
resources = Resources()
|
||||
kakasi = Kakasi()
|
||||
|
||||
from setup.publish import Manual, TagRelease, Stage1, Stage2, \
|
||||
Stage3, Stage4, Publish
|
||||
|
@ -11,7 +11,7 @@ from distutils import sysconfig
|
||||
|
||||
from PyQt4.pyqtconfig import QtGuiModuleMakefile
|
||||
|
||||
from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
|
||||
from setup import Command, islinux, isfreebsd, isbsd, isosx, SRC, iswindows
|
||||
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
|
||||
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
|
||||
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
|
||||
@ -21,7 +21,7 @@ from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
|
||||
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \
|
||||
icu_lib_dirs
|
||||
MT
|
||||
isunix = islinux or isosx or isfreebsd
|
||||
isunix = islinux or isosx or isbsd
|
||||
|
||||
make = 'make' if isunix else NMAKE
|
||||
|
||||
@ -205,7 +205,7 @@ if islinux:
|
||||
ldflags.append('-lpython'+sysconfig.get_python_version())
|
||||
|
||||
|
||||
if isfreebsd:
|
||||
if isbsd:
|
||||
cflags.append('-pthread')
|
||||
ldflags.append('-shared')
|
||||
cflags.append('-I'+sysconfig.get_python_inc())
|
||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
|
||||
|
||||
from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
|
||||
from setup import Command, islinux, isfreebsd, isbsd, basenames, modules, functions, \
|
||||
__appname__, __version__
|
||||
|
||||
HEADER = '''\
|
||||
@ -116,7 +116,7 @@ class Develop(Command):
|
||||
|
||||
|
||||
def pre_sub_commands(self, opts):
|
||||
if not (islinux or isfreebsd):
|
||||
if not (islinux or isbsd):
|
||||
self.info('\nSetting up a source based development environment is only '
|
||||
'supported on linux. On other platforms, see the User Manual'
|
||||
' for help with setting up a development environment.')
|
||||
@ -156,7 +156,7 @@ class Develop(Command):
|
||||
self.warn('Failed to compile mount helper. Auto mounting of',
|
||||
' devices will not work')
|
||||
|
||||
if not isfreebsd and os.geteuid() != 0:
|
||||
if not isbsd and os.geteuid() != 0:
|
||||
return self.warn('Must be run as root to compile mount helper. Auto '
|
||||
'mounting of devices will not work.')
|
||||
src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
|
||||
@ -168,7 +168,7 @@ class Develop(Command):
|
||||
ret = p.wait()
|
||||
if ret != 0:
|
||||
return warn()
|
||||
if not isfreebsd:
|
||||
if not isbsd:
|
||||
os.chown(dest, 0, 0)
|
||||
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
|
||||
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)
|
||||
|
@ -30,11 +30,12 @@ int report_libc_error(const char *msg) {
|
||||
}
|
||||
|
||||
int pyobject_to_int(PyObject *res) {
|
||||
int ret; PyObject *tmp;
|
||||
tmp = PyNumber_Int(res);
|
||||
if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
|
||||
else ret = (int)PyInt_AS_LONG(tmp);
|
||||
|
||||
int ret = 0; PyObject *tmp;
|
||||
if (res != NULL) {
|
||||
tmp = PyNumber_Int(res);
|
||||
if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
|
||||
else ret = (int)PyInt_AS_LONG(tmp);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
|
||||
from setup.installer.windows.wix import WixMixIn
|
||||
|
||||
OPENSSL_DIR = r'Q:\openssl'
|
||||
QT_DIR = 'Q:\\Qt\\4.7.2'
|
||||
QT_DIR = 'Q:\\Qt\\4.7.3'
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUSB_DIR = 'C:\\libusb'
|
||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
|
@ -11,9 +11,6 @@
|
||||
SummaryCodepage='1252' />
|
||||
|
||||
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
|
||||
<!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
|
||||
after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
|
||||
<Property Id='REINSTALLMODE' Value='emus'/>
|
||||
|
||||
<Upgrade Id="{upgrade_code}">
|
||||
<UpgradeVersion Maximum="{version}"
|
||||
@ -29,6 +26,11 @@
|
||||
</Upgrade>
|
||||
<CustomAction Id="PreventDowngrading" Error="Newer version already installed."/>
|
||||
|
||||
<Property Id="APPLICATIONFOLDER">
|
||||
<RegistrySearch Id='calibreInstDir' Type='raw'
|
||||
Root='HKLM' Key="Software\{app}\Installer" Name="InstallPath" />
|
||||
</Property>
|
||||
|
||||
<Directory Id='TARGETDIR' Name='SourceDir'>
|
||||
<Merge Id="VCRedist" SourceFile="{crt_msm}" DiskId="1" Language="0"/>
|
||||
<Directory Id='ProgramFilesFolder' Name='PFiles'>
|
||||
@ -46,6 +48,9 @@
|
||||
<Environment Id='UpdatePath' Name='PATH' Action='set' System='yes' Part='last' Value='[APPLICATIONFOLDER]' />
|
||||
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/>
|
||||
</Component>
|
||||
<Component Id="RememberInstallDir" Guid="*">
|
||||
<RegistryValue Root="HKLM" Key="Software\{app}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/>
|
||||
</Component>
|
||||
</DirectoryRef>
|
||||
|
||||
<DirectoryRef Id="ApplicationProgramsFolder">
|
||||
@ -90,7 +95,8 @@
|
||||
ConfigurableDirectory="APPLICATIONFOLDER">
|
||||
|
||||
<Feature Id="MainApplication" Title="Program Files" Level="1"
|
||||
Description="All the files need to run {app}" Absent="disallow">
|
||||
Description="All the files needed to run {app}" Absent="disallow">
|
||||
<ComponentRef Id="RememberInstallDir"/>
|
||||
</Feature>
|
||||
|
||||
<Feature Id="VCRedist" Title="Visual C++ 8.0 Runtime" AllowAdvertise="no" Display="hidden" Level="1">
|
||||
@ -118,7 +124,7 @@
|
||||
<Property Id="ARPPRODUCTICON" Value="main_icon" />
|
||||
|
||||
<Condition
|
||||
Message="This application is only supported on Windows XP SP2, or higher.">
|
||||
Message="This application is only supported on Windows XP SP3, or higher.">
|
||||
<![CDATA[Installed OR (VersionNT >= 501)]]>
|
||||
</Condition>
|
||||
<InstallExecuteSequence>
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob
|
||||
import os, cPickle, re, shutil, marshal, zipfile, glob
|
||||
from zlib import compress
|
||||
|
||||
from setup import Command, basenames, __appname__
|
||||
@ -23,13 +23,114 @@ def get_opts_from_parser(parser):
|
||||
for o in g.option_list:
|
||||
for x in do_opt(o): yield x
|
||||
|
||||
class Resources(Command):
|
||||
class Kakasi(Command):
|
||||
|
||||
description = 'Compile various needed calibre resources'
|
||||
description = 'Compile resources for unihandecode'
|
||||
|
||||
KAKASI_PATH = os.path.join(Command.SRC, __appname__,
|
||||
'ebooks', 'unihandecode', 'pykakasi')
|
||||
|
||||
def run(self, opts):
|
||||
self.records = {}
|
||||
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanwadict2.pickle')
|
||||
base = os.path.dirname(dest)
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating Kanwadict')
|
||||
|
||||
for line in open(src, "r"):
|
||||
self.parsekdict(line)
|
||||
self.kanwaout(dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','itaijidict2.pickle')
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating Itaijidict')
|
||||
self.mkitaiji(src, dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanadict2.pickle')
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating kanadict')
|
||||
self.mkkanadict(src, dest)
|
||||
|
||||
return
|
||||
|
||||
|
||||
def mkitaiji(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "r"):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
|
||||
dic[pair[0]] = pair[1]
|
||||
cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
|
||||
|
||||
def mkkanadict(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "r"):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
(alpha, kana) = line.split(' ')
|
||||
dic[kana] = alpha
|
||||
cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
|
||||
|
||||
def parsekdict(self, line):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
return
|
||||
(yomi, kanji) = line.split(' ')
|
||||
if ord(yomi[-1:]) <= ord('z'):
|
||||
tail = yomi[-1:]
|
||||
yomi = yomi[:-1]
|
||||
else:
|
||||
tail = ''
|
||||
self.updaterec(kanji, yomi, tail)
|
||||
|
||||
def updaterec(self, kanji, yomi, tail):
|
||||
key = "%04x"%ord(kanji[0])
|
||||
if key in self.records:
|
||||
if kanji in self.records[key]:
|
||||
rec = self.records[key][kanji]
|
||||
rec.append((yomi,tail))
|
||||
self.records[key].update( {kanji: rec} )
|
||||
else:
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
else:
|
||||
self.records[key] = {}
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
|
||||
def kanwaout(self, out):
|
||||
with open(out, 'wb') as f:
|
||||
dic = {}
|
||||
for k, v in self.records.iteritems():
|
||||
dic[k] = compress(marshal.dumps(v))
|
||||
cPickle.dump(dic, f, -1)
|
||||
|
||||
def clean(self):
|
||||
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
|
||||
if os.path.exists(kakasi):
|
||||
shutil.rmtree(kakasi)
|
||||
|
||||
class Resources(Command):
|
||||
|
||||
description = 'Compile various needed calibre resources'
|
||||
sub_commands = ['kakasi']
|
||||
|
||||
def run(self, opts):
|
||||
scripts = {}
|
||||
for x in ('console', 'gui'):
|
||||
@ -117,108 +218,13 @@ class Resources(Command):
|
||||
import json
|
||||
json.dump(function_dict, open(dest, 'wb'), indent=4)
|
||||
|
||||
self.run_kakasi(opts)
|
||||
|
||||
def run_kakasi(self, opts):
|
||||
self.records = {}
|
||||
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanwadict2.db')
|
||||
base = os.path.dirname(dest)
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating Kanwadict')
|
||||
|
||||
for line in open(src, "r"):
|
||||
self.parsekdict(line)
|
||||
self.kanwaout(dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','itaijidict2.pickle')
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating Itaijidict')
|
||||
self.mkitaiji(src, dest)
|
||||
|
||||
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
|
||||
dest = self.j(self.RESOURCES, 'localization',
|
||||
'pykakasi','kanadict2.pickle')
|
||||
|
||||
if self.newer(dest, src):
|
||||
self.info('\tGenerating kanadict')
|
||||
self.mkkanadict(src, dest)
|
||||
|
||||
return
|
||||
|
||||
|
||||
def mkitaiji(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "r"):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
|
||||
dic[pair[0]] = pair[1]
|
||||
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||
|
||||
def mkkanadict(self, src, dst):
|
||||
dic = {}
|
||||
for line in open(src, "r"):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
continue
|
||||
if re.match(r"^$",line):
|
||||
continue
|
||||
(alpha, kana) = line.split(' ')
|
||||
dic[kana] = alpha
|
||||
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
|
||||
|
||||
def parsekdict(self, line):
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.startswith(';;'): # skip comment
|
||||
return
|
||||
(yomi, kanji) = line.split(' ')
|
||||
if ord(yomi[-1:]) <= ord('z'):
|
||||
tail = yomi[-1:]
|
||||
yomi = yomi[:-1]
|
||||
else:
|
||||
tail = ''
|
||||
self.updaterec(kanji, yomi, tail)
|
||||
|
||||
def updaterec(self, kanji, yomi, tail):
|
||||
key = "%04x"%ord(kanji[0])
|
||||
if key in self.records:
|
||||
if kanji in self.records[key]:
|
||||
rec = self.records[key][kanji]
|
||||
rec.append((yomi,tail))
|
||||
self.records[key].update( {kanji: rec} )
|
||||
else:
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
else:
|
||||
self.records[key] = {}
|
||||
self.records[key][kanji]=[(yomi, tail)]
|
||||
|
||||
def kanwaout(self, out):
|
||||
dic = anydbm.open(out, 'c')
|
||||
for (k, v) in self.records.iteritems():
|
||||
dic[k] = compress(marshal.dumps(v))
|
||||
dic.close()
|
||||
|
||||
|
||||
def clean(self):
|
||||
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
|
||||
x = self.j(self.RESOURCES, x+'.pickle')
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
|
||||
if os.path.exists(kakasi):
|
||||
shutil.rmtree(kakasi)
|
||||
|
||||
from setup.commands import kakasi
|
||||
kakasi.clean()
|
||||
|
||||
|
||||
|
||||
|
@ -12,10 +12,10 @@ from functools import partial
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
|
||||
|
||||
from calibre.constants import (iswindows, isosx, islinux, isfreebsd, isfrozen,
|
||||
preferred_encoding, __appname__, __version__, __author__,
|
||||
win32event, win32api, winerror, fcntl,
|
||||
filesystem_encoding, plugins, config_dir)
|
||||
from calibre.constants import (iswindows, isosx, islinux, isfrozen,
|
||||
isbsd, preferred_encoding, __appname__, __version__, __author__,
|
||||
win32event, win32api, winerror, fcntl,
|
||||
filesystem_encoding, plugins, config_dir)
|
||||
from calibre.startup import winutil, winutilerror
|
||||
|
||||
if False and islinux and not getattr(sys, 'frozen', False):
|
||||
@ -31,7 +31,7 @@ if False:
|
||||
# Prevent pyflakes from complaining
|
||||
winutil, winutilerror, __appname__, islinux, __version__
|
||||
fcntl, win32event, isfrozen, __author__
|
||||
winerror, win32api, isfreebsd
|
||||
winerror, win32api, isbsd
|
||||
|
||||
_mt_inited = False
|
||||
def _init_mimetypes():
|
||||
@ -630,6 +630,24 @@ def human_readable(size):
|
||||
size = size[:-2]
|
||||
return size + " " + suffix
|
||||
|
||||
def remove_bracketed_text(src,
|
||||
brackets={u'(':u')', u'[':u']', u'{':u'}'}):
|
||||
from collections import Counter
|
||||
counts = Counter()
|
||||
buf = []
|
||||
src = force_unicode(src)
|
||||
rmap = dict([(v, k) for k, v in brackets.iteritems()])
|
||||
for char in src:
|
||||
if char in brackets:
|
||||
counts[char] += 1
|
||||
elif char in rmap:
|
||||
idx = rmap[char]
|
||||
if counts[idx] > 0:
|
||||
counts[idx] -= 1
|
||||
elif sum(counts.itervalues()) < 1:
|
||||
buf.append(char)
|
||||
return u''.join(buf)
|
||||
|
||||
if isosx:
|
||||
import glob, shutil
|
||||
fdir = os.path.expanduser('~/.fonts')
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 0)
|
||||
numeric_version = (0, 8, 2)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
@ -27,7 +27,9 @@ iswindows = 'win32' in _plat or 'win64' in _plat
|
||||
isosx = 'darwin' in _plat
|
||||
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
|
||||
isfreebsd = 'freebsd' in _plat
|
||||
islinux = not(iswindows or isosx or isfreebsd)
|
||||
isnetbsd = 'netbsd' in _plat
|
||||
isbsd = isfreebsd or isnetbsd
|
||||
islinux = not(iswindows or isosx or isbsd)
|
||||
isfrozen = hasattr(sys, 'frozen')
|
||||
isunix = isosx or islinux
|
||||
|
||||
|
@ -607,9 +607,22 @@ class StoreBase(Plugin): # {{{
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'John Schember'
|
||||
type = _('Store')
|
||||
# Information about the store. Should be in the primary language
|
||||
# of the store. This should not be translatable when set by
|
||||
# a subclass.
|
||||
description = _('An ebook store.')
|
||||
minimum_calibre_version = (0, 8, 0)
|
||||
version = (1, 0, 1)
|
||||
|
||||
actual_plugin = None
|
||||
|
||||
# Does the store only distribute ebooks without DRM.
|
||||
drm_free_only = False
|
||||
# This is the 2 letter country code for the corporate
|
||||
# headquarters of the store.
|
||||
headquarters = ''
|
||||
# All formats the store distributes ebooks in.
|
||||
formats = []
|
||||
|
||||
def load_actual_plugin(self, gui):
|
||||
'''
|
||||
|
@ -1,4 +1,5 @@
|
||||
import os.path
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
@ -629,6 +630,7 @@ from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
from calibre.ebooks.metadata.sources.douban import Douban
|
||||
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
|
||||
|
||||
# }}}
|
||||
@ -853,6 +855,17 @@ class ActionStore(InterfaceActionBase):
|
||||
author = 'John Schember'
|
||||
actual_plugin = 'calibre.gui2.actions.store:StoreAction'
|
||||
|
||||
def customization_help(self, gui=False):
|
||||
return 'Customize the behavior of the store search.'
|
||||
|
||||
def config_widget(self):
|
||||
from calibre.gui2.store.config.store import config_widget as get_cw
|
||||
return get_cw()
|
||||
|
||||
def save_settings(self, config_widget):
|
||||
from calibre.gui2.store.config.store import save_settings as save
|
||||
save(config_widget)
|
||||
|
||||
plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
|
||||
ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
|
||||
ActionFetchNews, ActionSaveToDisk, ActionShowBookDetails,
|
||||
@ -1093,100 +1106,315 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
|
||||
# Store plugins {{{
|
||||
class StoreAmazonKindleStore(StoreBase):
|
||||
name = 'Amazon Kindle'
|
||||
description = _('Kindle books from Amazon')
|
||||
description = u'Kindle books from Amazon.'
|
||||
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'US'
|
||||
formats = ['KINDLE']
|
||||
|
||||
class StoreAmazonDEKindleStore(StoreBase):
|
||||
name = 'Amazon DE Kindle'
|
||||
author = 'Charles Haley'
|
||||
description = u'Kindle Bücher von Amazon.'
|
||||
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'DE'
|
||||
formats = ['KINDLE']
|
||||
|
||||
class StoreAmazonUKKindleStore(StoreBase):
|
||||
name = 'Amazon UK Kindle'
|
||||
description = _('Kindle books from Amazon.uk')
|
||||
author = 'Charles Haley'
|
||||
description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.'
|
||||
actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'UK'
|
||||
formats = ['KINDLE']
|
||||
|
||||
class StoreArchiveOrgStore(StoreBase):
|
||||
name = 'Archive.org'
|
||||
description = u'An Internet library offering permanent access for researchers, historians, scholars, people with disabilities, and the general public to historical collections that exist in digital format.'
|
||||
actual_plugin = 'calibre.gui2.store.archive_org_plugin:ArchiveOrgStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
|
||||
|
||||
class StoreBaenWebScriptionStore(StoreBase):
|
||||
name = 'Baen WebScription'
|
||||
description = _('Ebooks for readers.')
|
||||
description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
|
||||
actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'LIT', 'LRF', 'MOBI', 'RB', 'RTF', 'ZIP']
|
||||
|
||||
class StoreBNStore(StoreBase):
|
||||
name = 'Barnes and Noble'
|
||||
description = _('Books, Textbooks, eBooks, Toys, Games and More.')
|
||||
description = u'The world\'s largest book seller. As the ultimate destination for book lovers, Barnes & Noble.com offers an incredible array of content.'
|
||||
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'US'
|
||||
formats = ['NOOK']
|
||||
|
||||
class StoreBeamEBooksDEStore(StoreBase):
|
||||
name = 'Beam EBooks DE'
|
||||
author = 'Charles Haley'
|
||||
description = u'Bei uns finden Sie: Tausende deutschsprachige eBooks; Alle eBooks ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; eBooks für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis eBooks'
|
||||
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'DE'
|
||||
formats = ['EPUB', 'MOBI', 'PDF']
|
||||
|
||||
class StoreBeWriteStore(StoreBase):
|
||||
name = 'BeWrite Books'
|
||||
description = _('Publishers of fine books.')
|
||||
description = u'Publishers of fine books. Highly selective and editorially driven. Does not offer: books for children or exclusively YA, erotica, swords-and-sorcery fantasy and space-opera-style science fiction. All other genres are represented.'
|
||||
actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'MOBI', 'PDF']
|
||||
|
||||
class StoreDieselEbooksStore(StoreBase):
|
||||
name = 'Diesel eBooks'
|
||||
description = _('World Famous eBook Store.')
|
||||
description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
|
||||
actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'PDF']
|
||||
|
||||
class StoreEbookscomStore(StoreBase):
|
||||
name = 'eBooks.com'
|
||||
description = _('The digital bookstore.')
|
||||
description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
|
||||
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
|
||||
|
||||
class StoreEHarlequinStoretore(StoreBase):
|
||||
drm_free_only = False
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
|
||||
|
||||
class StoreEPubBuyDEStore(StoreBase):
|
||||
name = 'EPUBBuy DE'
|
||||
author = 'Charles Haley'
|
||||
description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
|
||||
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'DE'
|
||||
formats = ['EPUB']
|
||||
|
||||
class StoreEHarlequinStore(StoreBase):
|
||||
name = 'eHarlequin'
|
||||
description = _('entertain, enrich, inspire.')
|
||||
description = u'A global leader in series romance and one of the world\'s leading publishers of books for women. Offers women a broad range of reading from romance to bestseller fiction, from young adult novels to erotic literature, from nonfiction to fantasy, from African-American novels to inspirational romance, and more.'
|
||||
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'CA'
|
||||
formats = ['EPUB', 'PDF']
|
||||
|
||||
class StoreFeedbooksStore(StoreBase):
|
||||
name = 'Feedbooks'
|
||||
description = _('Read anywhere.')
|
||||
description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
|
||||
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
|
||||
|
||||
class StoreGutenbergStore(StoreBase):
|
||||
name = 'Project Gutenberg'
|
||||
description = _('The first producer of free ebooks.')
|
||||
actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
|
||||
|
||||
class StoreKoboStore(StoreBase):
|
||||
name = 'Kobo'
|
||||
description = _('eReading: anytime. anyplace.')
|
||||
actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
|
||||
|
||||
class StoreManyBooksStore(StoreBase):
|
||||
name = 'ManyBooks'
|
||||
description = _('The best ebooks at the best price: free!')
|
||||
actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
|
||||
|
||||
class StoreMobileReadStore(StoreBase):
|
||||
name = 'MobileRead'
|
||||
description = _('Ebooks handcrafted with the utmost care')
|
||||
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
|
||||
|
||||
class StoreOpenLibraryStore(StoreBase):
|
||||
name = 'Open Library'
|
||||
description = _('One web page for every book.')
|
||||
actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
|
||||
|
||||
class StoreSmashwordsStore(StoreBase):
|
||||
name = 'Smashwords'
|
||||
description = _('Your ebook. Your way.')
|
||||
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
|
||||
|
||||
class StoreWaterstonesUKStore(StoreBase):
|
||||
name = 'Waterstones UK'
|
||||
description = _('Feel every word')
|
||||
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
|
||||
drm_free_only = False
|
||||
headquarters = 'FR'
|
||||
formats = ['EPUB', 'MOBI', 'PDF']
|
||||
|
||||
class StoreFoylesUKStore(StoreBase):
|
||||
name = 'Foyles UK'
|
||||
description = _('Foyles of London, online')
|
||||
author = 'Charles Haley'
|
||||
description = u'Foyles of London\'s ebook store. Provides extensive range covering all subjects.'
|
||||
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
|
||||
|
||||
class AmazonDEKindleStore(StoreBase):
|
||||
name = 'Amazon DE Kindle'
|
||||
description = _('Kindle eBooks')
|
||||
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
|
||||
drm_free_only = False
|
||||
headquarters = 'UK'
|
||||
formats = ['EPUB', 'PDF']
|
||||
|
||||
plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore,
|
||||
StoreBaenWebScriptionStore, StoreBNStore,
|
||||
StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
|
||||
StoreEHarlequinStoretore, StoreFeedbooksStore,
|
||||
StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
|
||||
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
|
||||
StoreWaterstonesUKStore]
|
||||
class StoreGandalfStore(StoreBase):
|
||||
name = 'Gandalf'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Księgarnia internetowa Gandalf.'
|
||||
actual_plugin = 'calibre.gui2.store.gandalf_plugin:GandalfStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB', 'PDF']
|
||||
|
||||
class StoreGoogleBooksStore(StoreBase):
|
||||
name = 'Google Books'
|
||||
description = u'Google Books'
|
||||
actual_plugin = 'calibre.gui2.store.google_books_plugin:GoogleBooksStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'PDF', 'TXT']
|
||||
|
||||
class StoreGutenbergStore(StoreBase):
|
||||
name = 'Project Gutenberg'
|
||||
description = u'The first producer of free ebooks. Free in the United States because their copyright has expired. They may not be free of copyright in other countries. Readers outside of the United States must check the copyright laws of their countries before downloading or redistributing our ebooks.'
|
||||
actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'HTML', 'MOBI', 'PDB', 'TXT']
|
||||
|
||||
class StoreKoboStore(StoreBase):
|
||||
name = 'Kobo'
|
||||
description = u'With over 2.3 million eBooks to browse we have engaged readers in over 200 countries in Kobo eReading. Our eBook listings include New York Times Bestsellers, award winners, classics and more!'
|
||||
actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'CA'
|
||||
formats = ['EPUB']
|
||||
|
||||
class StoreLegimiStore(StoreBase):
|
||||
name = 'Legimi'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer'
|
||||
actual_plugin = 'calibre.gui2.store.legimi_plugin:LegimiStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB']
|
||||
|
||||
class StoreManyBooksStore(StoreBase):
|
||||
name = 'ManyBooks'
|
||||
description = u'Public domain and creative commons works from many sources.'
|
||||
actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'FB2', 'JAR', 'LIT', 'LRF', 'MOBI', 'PDB', 'PDF', 'RB', 'RTF', 'TCR', 'TXT', 'ZIP']
|
||||
|
||||
class StoreMobileReadStore(StoreBase):
|
||||
name = 'MobileRead'
|
||||
description = u'Ebooks handcrafted with the utmost care.'
|
||||
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'CH'
|
||||
formats = ['EPUB', 'IMP', 'LRF', 'LIT', 'MOBI', 'PDF']
|
||||
|
||||
class StoreNextoStore(StoreBase):
|
||||
name = 'Nexto'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Największy w Polsce sklep internetowy z audiobookami mp3, ebookami pdf oraz prasą do pobrania on-line.'
|
||||
actual_plugin = 'calibre.gui2.store.nexto_plugin:NextoStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB', 'PDF']
|
||||
|
||||
class StoreOpenLibraryStore(StoreBase):
|
||||
name = 'Open Library'
|
||||
description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.'
|
||||
actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
|
||||
|
||||
class StoreOReillyStore(StoreBase):
|
||||
name = 'OReilly'
|
||||
description = u'Programming and tech ebooks from OReilly.'
|
||||
actual_plugin = 'calibre.gui2.store.oreilly_plugin:OReillyStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['APK', 'DAISY', 'EPUB', 'MOBI', 'PDF']
|
||||
|
||||
class StorePragmaticBookshelfStore(StoreBase):
|
||||
name = 'Pragmatic Bookshelf'
|
||||
description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
|
||||
actual_plugin = 'calibre.gui2.store.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'MOBI', 'PDF']
|
||||
|
||||
class StoreSmashwordsStore(StoreBase):
|
||||
name = 'Smashwords'
|
||||
description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
|
||||
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'HTML', 'LRF', 'MOBI', 'PDB', 'RTF', 'TXT']
|
||||
|
||||
class StoreWaterstonesUKStore(StoreBase):
|
||||
name = 'Waterstones UK'
|
||||
author = 'Charles Haley'
|
||||
description = u'Waterstone\'s mission is to be the leading Bookseller on the High Street and online providing customers the widest choice, great value and expert advice from a team passionate about Bookselling.'
|
||||
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'UK'
|
||||
formats = ['EPUB', 'PDF']
|
||||
|
||||
class StoreWeightlessBooksStore(StoreBase):
|
||||
name = 'Weightless Books'
|
||||
description = u'An independent DRM-free ebooksite devoted to ebooks of all sorts.'
|
||||
actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'US'
|
||||
formats = ['EPUB', 'HTML', 'LIT', 'MOBI', 'PDF']
|
||||
|
||||
class StoreWizardsTowerBooksStore(StoreBase):
|
||||
name = 'Wizards Tower Books'
|
||||
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
|
||||
actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'UK'
|
||||
formats = ['EPUB', 'MOBI']
|
||||
|
||||
class StoreWoblinkStore(StoreBase):
|
||||
name = 'Woblink'
|
||||
author = 'Tomasz Długosz'
|
||||
description = u'Czytanie zdarza się wszędzie!'
|
||||
actual_plugin = 'calibre.gui2.store.woblink_plugin:WoblinkStore'
|
||||
|
||||
drm_free_only = False
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB']
|
||||
|
||||
plugins += [
|
||||
StoreArchiveOrgStore,
|
||||
StoreAmazonKindleStore,
|
||||
StoreAmazonDEKindleStore,
|
||||
StoreAmazonUKKindleStore,
|
||||
StoreBaenWebScriptionStore,
|
||||
StoreBNStore,
|
||||
StoreBeamEBooksDEStore,
|
||||
StoreBeWriteStore,
|
||||
StoreDieselEbooksStore,
|
||||
StoreEbookscomStore,
|
||||
StoreEPubBuyDEStore,
|
||||
StoreEHarlequinStore,
|
||||
StoreFeedbooksStore,
|
||||
StoreFoylesUKStore,
|
||||
StoreGandalfStore,
|
||||
StoreGoogleBooksStore,
|
||||
StoreGutenbergStore,
|
||||
StoreKoboStore,
|
||||
StoreLegimiStore,
|
||||
StoreManyBooksStore,
|
||||
StoreMobileReadStore,
|
||||
StoreNextoStore,
|
||||
StoreOpenLibraryStore,
|
||||
StoreOReillyStore,
|
||||
StorePragmaticBookshelfStore,
|
||||
StoreSmashwordsStore,
|
||||
StoreWaterstonesUKStore,
|
||||
StoreWeightlessBooksStore,
|
||||
StoreWizardsTowerBooksStore,
|
||||
StoreWoblinkStore
|
||||
]
|
||||
|
||||
# }}}
|
||||
|
@ -253,7 +253,7 @@ class OutputProfile(Plugin):
|
||||
periodical_date_in_title = True
|
||||
|
||||
#: Characters used in jackets and catalogs
|
||||
missing_char = u'x'
|
||||
missing_char = u'x'
|
||||
ratings_char = u'*'
|
||||
empty_ratings_char = u' '
|
||||
read_char = u'+'
|
||||
@ -293,38 +293,38 @@ class iPadOutput(OutputProfile):
|
||||
}
|
||||
]
|
||||
|
||||
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
|
||||
ratings_char = u'\u2605' # filled star
|
||||
empty_ratings_char = u'\u2606' # hollow star
|
||||
read_char = u'\u2713' # check mark
|
||||
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
|
||||
ratings_char = u'\u2605' # filled star
|
||||
empty_ratings_char = u'\u2606' # hollow star
|
||||
read_char = u'\u2713' # check mark
|
||||
|
||||
touchscreen = True
|
||||
# touchscreen_news_css {{{
|
||||
touchscreen_news_css = u'''
|
||||
/* hr used in articles */
|
||||
.article_articles_list {
|
||||
/* hr used in articles */
|
||||
.article_articles_list {
|
||||
width:18%;
|
||||
}
|
||||
}
|
||||
.article_link {
|
||||
color: #593f29;
|
||||
color: #593f29;
|
||||
font-style: italic;
|
||||
}
|
||||
.article_next {
|
||||
-webkit-border-top-right-radius:4px;
|
||||
-webkit-border-bottom-right-radius:4px;
|
||||
-webkit-border-top-right-radius:4px;
|
||||
-webkit-border-bottom-right-radius:4px;
|
||||
font-style: italic;
|
||||
width:32%;
|
||||
}
|
||||
|
||||
.article_prev {
|
||||
-webkit-border-top-left-radius:4px;
|
||||
-webkit-border-bottom-left-radius:4px;
|
||||
-webkit-border-top-left-radius:4px;
|
||||
-webkit-border-bottom-left-radius:4px;
|
||||
font-style: italic;
|
||||
width:32%;
|
||||
}
|
||||
.article_sections_list {
|
||||
.article_sections_list {
|
||||
width:18%;
|
||||
}
|
||||
}
|
||||
.articles_link {
|
||||
font-weight: bold;
|
||||
}
|
||||
@ -334,8 +334,8 @@ class iPadOutput(OutputProfile):
|
||||
|
||||
|
||||
.caption_divider {
|
||||
border:#ccc 1px solid;
|
||||
}
|
||||
border:#ccc 1px solid;
|
||||
}
|
||||
|
||||
.touchscreen_navbar {
|
||||
background:#c3bab2;
|
||||
@ -357,50 +357,50 @@ class iPadOutput(OutputProfile):
|
||||
text-align:center;
|
||||
}
|
||||
|
||||
.touchscreen_navbar td a:link {
|
||||
color: #593f29;
|
||||
text-decoration: none;
|
||||
}
|
||||
.touchscreen_navbar td a:link {
|
||||
color: #593f29;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* Index formatting */
|
||||
.publish_date {
|
||||
text-align:center;
|
||||
}
|
||||
.divider {
|
||||
border-bottom:1em solid white;
|
||||
border-top:1px solid gray;
|
||||
}
|
||||
/* Index formatting */
|
||||
.publish_date {
|
||||
text-align:center;
|
||||
}
|
||||
.divider {
|
||||
border-bottom:1em solid white;
|
||||
border-top:1px solid gray;
|
||||
}
|
||||
|
||||
hr.caption_divider {
|
||||
border-color:black;
|
||||
border-style:solid;
|
||||
border-width:1px;
|
||||
}
|
||||
hr.caption_divider {
|
||||
border-color:black;
|
||||
border-style:solid;
|
||||
border-width:1px;
|
||||
}
|
||||
|
||||
/* Feed summary formatting */
|
||||
.article_summary {
|
||||
display:inline-block;
|
||||
}
|
||||
display:inline-block;
|
||||
}
|
||||
.feed {
|
||||
font-family:sans-serif;
|
||||
font-weight:bold;
|
||||
font-size:larger;
|
||||
}
|
||||
}
|
||||
|
||||
.feed_link {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.feed_next {
|
||||
-webkit-border-top-right-radius:4px;
|
||||
-webkit-border-bottom-right-radius:4px;
|
||||
-webkit-border-top-right-radius:4px;
|
||||
-webkit-border-bottom-right-radius:4px;
|
||||
font-style: italic;
|
||||
width:40%;
|
||||
}
|
||||
|
||||
.feed_prev {
|
||||
-webkit-border-top-left-radius:4px;
|
||||
-webkit-border-bottom-left-radius:4px;
|
||||
-webkit-border-top-left-radius:4px;
|
||||
-webkit-border-bottom-left-radius:4px;
|
||||
font-style: italic;
|
||||
width:40%;
|
||||
}
|
||||
@ -410,24 +410,24 @@ class iPadOutput(OutputProfile):
|
||||
font-size: 160%;
|
||||
}
|
||||
|
||||
.feed_up {
|
||||
.feed_up {
|
||||
font-weight: bold;
|
||||
width:20%;
|
||||
}
|
||||
}
|
||||
|
||||
.summary_headline {
|
||||
font-weight:bold;
|
||||
text-align:left;
|
||||
}
|
||||
}
|
||||
|
||||
.summary_byline {
|
||||
text-align:left;
|
||||
font-family:monospace;
|
||||
}
|
||||
}
|
||||
|
||||
.summary_text {
|
||||
text-align:left;
|
||||
}
|
||||
}
|
||||
|
||||
'''
|
||||
# }}}
|
||||
@ -617,8 +617,8 @@ class KindleOutput(OutputProfile):
|
||||
supports_mobi_indexing = True
|
||||
periodical_date_in_title = False
|
||||
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
ratings_char = u'\u2605'
|
||||
read_char = u'\u2713'
|
||||
|
||||
@ -642,8 +642,8 @@ class KindleDXOutput(OutputProfile):
|
||||
#comic_screen_size = (741, 1022)
|
||||
supports_mobi_indexing = True
|
||||
periodical_date_in_title = False
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
missing_char = u'x\u2009'
|
||||
empty_ratings_char = u'\u2606'
|
||||
ratings_char = u'\u2605'
|
||||
read_char = u'\u2713'
|
||||
mobi_ems_per_blockquote = 2.0
|
||||
|
@ -92,7 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
|
||||
config['enabled_plugins'] = ep
|
||||
|
||||
default_disabled_plugins = set([
|
||||
'Overdrive',
|
||||
'Overdrive', 'Douban Books',
|
||||
])
|
||||
|
||||
def is_disabled(plugin):
|
||||
@ -216,9 +216,26 @@ def store_plugins():
|
||||
customization = config['plugin_customization']
|
||||
for plugin in _initialized_plugins:
|
||||
if isinstance(plugin, Store):
|
||||
if not is_disabled(plugin):
|
||||
plugin.site_customization = customization.get(plugin.name, '')
|
||||
yield plugin
|
||||
plugin.site_customization = customization.get(plugin.name, '')
|
||||
yield plugin
|
||||
|
||||
def available_store_plugins():
|
||||
for plugin in store_plugins():
|
||||
if not is_disabled(plugin):
|
||||
yield plugin
|
||||
|
||||
def stores():
|
||||
stores = set([])
|
||||
for plugin in store_plugins():
|
||||
stores.add(plugin.name)
|
||||
return stores
|
||||
|
||||
def available_stores():
|
||||
stores = set([])
|
||||
for plugin in available_store_plugins():
|
||||
stores.add(plugin.name)
|
||||
return stores
|
||||
|
||||
# }}}
|
||||
|
||||
# Metadata read/write {{{
|
||||
|
@ -59,7 +59,7 @@ class ANDROID(USBMS):
|
||||
0x0489 : { 0xc001 : [0x0226], 0xc004 : [0x0226], },
|
||||
|
||||
# Acer
|
||||
0x502 : { 0x3203 : [0x0100]},
|
||||
0x502 : { 0x3203 : [0x0100, 0x224]},
|
||||
|
||||
# Dell
|
||||
0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]},
|
||||
@ -109,7 +109,7 @@ class ANDROID(USBMS):
|
||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB']
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']
|
||||
|
@ -203,9 +203,11 @@ class ITUNES(DriverBase):
|
||||
# 0x1294 iPhone 3GS
|
||||
# 0x1297 iPhone 4
|
||||
# 0x129a iPad
|
||||
# 0x12a2 iPad2
|
||||
# 0x129f iPad2 (WiFi)
|
||||
# 0x12a2 iPad2 (GSM)
|
||||
# 0x12a3 iPad2 (CDMA)
|
||||
VENDOR_ID = [0x05ac]
|
||||
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x12a2]
|
||||
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a2,0x12a3]
|
||||
BCD = [0x01]
|
||||
|
||||
# Plugboard ID
|
||||
@ -939,7 +941,7 @@ class ITUNES(DriverBase):
|
||||
# declared in use_plugboard_ext and a device name of ITUNES
|
||||
if DEBUG:
|
||||
self.log.info("ITUNES.set_plugboard()")
|
||||
#self.log.info(' using plugboard %s' % plugboards)
|
||||
#self.log.info(' plugboard: %s' % plugboards)
|
||||
self.plugboards = plugboards
|
||||
self.plugboard_func = pb_func
|
||||
|
||||
@ -1050,7 +1052,6 @@ class ITUNES(DriverBase):
|
||||
'title': metadata[i].title,
|
||||
'uuid': metadata[i].uuid }
|
||||
|
||||
|
||||
# Report progress
|
||||
if self.report_progress is not None:
|
||||
self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))
|
||||
@ -2742,7 +2743,7 @@ class ITUNES(DriverBase):
|
||||
# Update metadata from plugboard
|
||||
# If self.plugboard is None (no transforms), original metadata is returned intact
|
||||
metadata_x = self._xform_metadata_via_plugboard(metadata, this_book.format)
|
||||
|
||||
self.log("metadata.title_sort: %s metadata_x.title_sort: %s" % (metadata.title_sort, metadata_x.title_sort))
|
||||
if isosx:
|
||||
if lb_added:
|
||||
lb_added.name.set(metadata_x.title)
|
||||
@ -2752,8 +2753,7 @@ class ITUNES(DriverBase):
|
||||
lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||
lb_added.enabled.set(True)
|
||||
lb_added.sort_artist.set(icu_title(metadata_x.author_sort))
|
||||
lb_added.sort_name.set(metadata.title_sort)
|
||||
|
||||
lb_added.sort_name.set(metadata_x.title_sort)
|
||||
|
||||
if db_added:
|
||||
db_added.name.set(metadata_x.title)
|
||||
@ -2763,7 +2763,7 @@ class ITUNES(DriverBase):
|
||||
db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||
db_added.enabled.set(True)
|
||||
db_added.sort_artist.set(icu_title(metadata_x.author_sort))
|
||||
db_added.sort_name.set(metadata.title_sort)
|
||||
db_added.sort_name.set(metadata_x.title_sort)
|
||||
|
||||
if metadata_x.comments:
|
||||
if lb_added:
|
||||
@ -2783,6 +2783,7 @@ class ITUNES(DriverBase):
|
||||
|
||||
# Set genre from series if available, else first alpha tag
|
||||
# Otherwise iTunes grabs the first dc:subject from the opf metadata
|
||||
# If title_sort applied in plugboard, that overrides using series/index as title_sort
|
||||
if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]:
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._update_iTunes_metadata()")
|
||||
@ -2794,7 +2795,9 @@ class ITUNES(DriverBase):
|
||||
fraction = index-integer
|
||||
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
|
||||
if lb_added:
|
||||
lb_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
|
||||
# If no title_sort plugboard tweak, create sort_name from series/index
|
||||
if metadata.title_sort == metadata_x.title_sort:
|
||||
lb_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
|
||||
lb_added.episode_ID.set(metadata_x.series)
|
||||
lb_added.episode_number.set(metadata_x.series_index)
|
||||
|
||||
@ -2808,7 +2811,9 @@ class ITUNES(DriverBase):
|
||||
break
|
||||
|
||||
if db_added:
|
||||
db_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
|
||||
# If no title_sort plugboard tweak, create sort_name from series/index
|
||||
if metadata.title_sort == metadata_x.title_sort:
|
||||
db_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
|
||||
db_added.episode_ID.set(metadata_x.series)
|
||||
db_added.episode_number.set(metadata_x.series_index)
|
||||
|
||||
@ -2843,7 +2848,7 @@ class ITUNES(DriverBase):
|
||||
lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||
lb_added.Enabled = True
|
||||
lb_added.SortArtist = icu_title(metadata_x.author_sort)
|
||||
lb_added.SortName = metadata.title_sort
|
||||
lb_added.SortName = metadata_x.title_sort
|
||||
|
||||
if db_added:
|
||||
db_added.Name = metadata_x.title
|
||||
@ -2853,7 +2858,7 @@ class ITUNES(DriverBase):
|
||||
db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
|
||||
db_added.Enabled = True
|
||||
db_added.SortArtist = icu_title(metadata_x.author_sort)
|
||||
db_added.SortName = metadata.title_sort
|
||||
db_added.SortName = metadata_x.title_sort
|
||||
|
||||
if metadata_x.comments:
|
||||
if lb_added:
|
||||
@ -2886,7 +2891,9 @@ class ITUNES(DriverBase):
|
||||
fraction = index-integer
|
||||
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
|
||||
if lb_added:
|
||||
lb_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
|
||||
# If no title_sort plugboard tweak, create sort_name from series/index
|
||||
if metadata.title_sort == metadata_x.title_sort:
|
||||
lb_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
|
||||
lb_added.EpisodeID = metadata_x.series
|
||||
|
||||
try:
|
||||
@ -2912,7 +2919,9 @@ class ITUNES(DriverBase):
|
||||
break
|
||||
|
||||
if db_added:
|
||||
db_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
|
||||
# If no title_sort plugboard tweak, create sort_name from series/index
|
||||
if metadata.title_sort == metadata_x.title_sort:
|
||||
db_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
|
||||
db_added.EpisodeID = metadata_x.series
|
||||
|
||||
try:
|
||||
@ -2973,6 +2982,9 @@ class ITUNES(DriverBase):
|
||||
newmi.publisher if book.publisher != newmi.publisher else ''))
|
||||
self.log.info(" tags: %s %s" % (book.tags, ">>> %s" %
|
||||
newmi.tags if book.tags != newmi.tags else ''))
|
||||
else:
|
||||
self.log(" matching plugboard not found")
|
||||
|
||||
else:
|
||||
newmi = book
|
||||
return newmi
|
||||
|
@ -95,9 +95,8 @@ class POCKETBOOK360(EB600):
|
||||
|
||||
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']
|
||||
|
||||
VENDOR_NAME = 'PHILIPS'
|
||||
WINDOWS_MAIN_MEM = 'MASS_STORGE'
|
||||
WINDOWS_CARD_A_MEM = 'MASS_STORGE'
|
||||
VENDOR_NAME = ['PHILIPS', '__POCKET']
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['MASS_STORGE', 'BOOK_USB_STORAGE']
|
||||
|
||||
OSX_MAIN_MEM = 'Philips Mass Storge Media'
|
||||
OSX_CARD_A_MEM = 'Philips Mass Storge Media'
|
||||
|
@ -38,7 +38,7 @@ class KOBO(USBMS):
|
||||
|
||||
VENDOR_ID = [0x2237]
|
||||
PRODUCT_ID = [0x4161]
|
||||
BCD = [0x0110]
|
||||
BCD = [0x0110, 0x0323]
|
||||
|
||||
VENDOR_NAME = ['KOBO_INC', 'KOBO']
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['.KOBOEREADER', 'EREADER']
|
||||
|
@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
|
||||
c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
|
||||
from errno import EBUSY, ENOMEM
|
||||
|
||||
from calibre import iswindows, isosx, isfreebsd, load_library
|
||||
from calibre import iswindows, isosx, isbsd, load_library
|
||||
|
||||
_libusb_name = 'libusb'
|
||||
PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096
|
||||
PATH_MAX = 511 if iswindows else 1024 if (isosx or isbsd) else 4096
|
||||
if iswindows:
|
||||
class Structure(_Structure):
|
||||
_pack_ = 1
|
||||
|
@ -269,8 +269,8 @@ class NEXTBOOK(USBMS):
|
||||
|
||||
EBOOK_DIR_MAIN = ''
|
||||
|
||||
VENDOR_NAME = 'NEXT2'
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '1.0.14'
|
||||
VENDOR_NAME = ['NEXT2', 'BK7005']
|
||||
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['1.0.14', 'PLAYER']
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
THUMBNAIL_HEIGHT = 120
|
||||
|
||||
|
@ -926,8 +926,8 @@ class Device(DeviceConfig, DevicePlugin):
|
||||
if not isinstance(template, unicode):
|
||||
template = template.decode('utf-8')
|
||||
app_id = str(getattr(mdata, 'application_id', ''))
|
||||
# The db id will be in the created filename
|
||||
extra_components = get_components(template, mdata, fname,
|
||||
id_ = mdata.get('id', fname)
|
||||
extra_components = get_components(template, mdata, id_,
|
||||
timefmt=opts.send_timefmt, length=maxlen-len(app_id)-1)
|
||||
if not extra_components:
|
||||
extra_components.append(sanitize(self.filename_callback(fname,
|
||||
|
@ -68,7 +68,8 @@ def check_command_line_options(parser, args, log):
|
||||
raise SystemExit(1)
|
||||
|
||||
output = args[2]
|
||||
if output.startswith('.') and output != '.':
|
||||
if output.startswith('.') and (output != '.' and not
|
||||
output.startswith('..')):
|
||||
output = os.path.splitext(os.path.basename(input))[0]+output
|
||||
output = os.path.abspath(output)
|
||||
|
||||
|
@ -103,10 +103,11 @@ class EPUBInput(InputFormatPlugin):
|
||||
t.set('href', guide_cover)
|
||||
t.set('title', 'Title Page')
|
||||
from calibre.ebooks import render_html_svg_workaround
|
||||
renderer = render_html_svg_workaround(guide_cover, log)
|
||||
if renderer is not None:
|
||||
open('calibre_raster_cover.jpg', 'wb').write(
|
||||
renderer)
|
||||
if os.path.exists(guide_cover):
|
||||
renderer = render_html_svg_workaround(guide_cover, log)
|
||||
if renderer is not None:
|
||||
open('calibre_raster_cover.jpg', 'wb').write(
|
||||
renderer)
|
||||
|
||||
def find_opf(self):
|
||||
def attr(n, attr):
|
||||
|
@ -413,6 +413,13 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
rule.style.removeProperty('margin-left')
|
||||
# padding-left breaks rendering in webkit and gecko
|
||||
rule.style.removeProperty('padding-left')
|
||||
# Change whitespace:pre to pre-line to accommodate readers that
|
||||
# cannot scroll horizontally
|
||||
for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
||||
style = rule.style
|
||||
ws = style.getPropertyValue('white-space')
|
||||
if ws == 'pre':
|
||||
style.setProperty('white-space', 'pre-wrap')
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -20,7 +20,7 @@ from itertools import izip
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.constants import islinux, isfreebsd, iswindows
|
||||
from calibre.constants import islinux, isbsd, iswindows
|
||||
from calibre import unicode_path, as_unicode
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
@ -302,7 +302,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
if getattr(self, '_is_case_sensitive', None) is not None:
|
||||
return self._is_case_sensitive
|
||||
if not path or not os.path.exists(path):
|
||||
return islinux or isfreebsd
|
||||
return islinux or isbsd
|
||||
self._is_case_sensitive = not (os.path.exists(path.lower()) \
|
||||
and os.path.exists(path.upper()))
|
||||
return self._is_case_sensitive
|
||||
|
@ -7,7 +7,6 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
|
||||
from calibre import guess_type, walk
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
@ -74,22 +73,23 @@ class HTMLZInput(InputFormatPlugin):
|
||||
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
|
||||
|
||||
# Get the cover path from the OPF.
|
||||
cover_href = None
|
||||
cover_path = None
|
||||
opf = None
|
||||
for x in walk('.'):
|
||||
if os.path.splitext(x)[1].lower() in ('.opf'):
|
||||
opf = x
|
||||
break
|
||||
if opf:
|
||||
opf = OPF(opf)
|
||||
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
|
||||
opf = OPF(opf, basedir=os.getcwd())
|
||||
cover_path = opf.raster_cover
|
||||
# Set the cover.
|
||||
if cover_href:
|
||||
if cover_path:
|
||||
cdata = None
|
||||
with open(cover_href, 'rb') as cf:
|
||||
with open(os.path.join(os.getcwd(), cover_path), 'rb') as cf:
|
||||
cdata = cf.read()
|
||||
id, href = oeb.manifest.generate('cover', cover_href)
|
||||
oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
|
||||
cover_name = os.path.basename(cover_path)
|
||||
id, href = oeb.manifest.generate('cover', cover_name)
|
||||
oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata)
|
||||
oeb.guide.add('cover', 'Cover', href)
|
||||
|
||||
return oeb
|
||||
|