[Sync] Sync with trunk. Revision 9312

This commit is contained in:
Li Fanxi 2011-05-26 00:23:16 +08:00
commit e44bb85993
287 changed files with 113448 additions and 104509 deletions

View File

@ -31,3 +31,4 @@ nbproject/
.pydevproject .pydevproject
.settings/ .settings/
*.DS_Store *.DS_Store
calibre_plugins/

View File

@ -19,6 +19,149 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.8.2
date: 2011-05-20
new features:
- title: "Various new ebook sources added to Get Books: Google Books, O'Reilly, archive.org, some Polish ebooks stores, etc."
- title: "Amazon metadata download: Allow user to configure Amazon plugin to use any of the US, UK, German, French and Italian Amazon websites"
- title: "When deleting large numbers of books, give the user the option to skip the Recycle Bin, since sending lots of files to the recycle bin can be very slow."
tickets: [784987]
- title: "OS X: The unified title+toolbar was disabled as it had various bugs. If you really want it you can turn it on again via Preferences->Tweaks, but be aware that you will see problems like the calibre windowd being too wide, weird animations when a device is detected, etc."
- title: "Add a tweak that controls what words are treated as suffixes when generating an author sort string from an author name."
- title: "Get Books: Store last few searches in history"
bug fixes:
- title: "Fix a crash when a device is connected/disconnected while a modal dialog opened from the toolbar is visible"
tickets: [780484]
- title: "Fix incorrect results from ebooks.com when searching via Get Books"
- title: "Metadata plugboards: Add prioritization scheme to allow for using different settings for different locations"
tickets: [783229]
- title: "Fix manage authors dialog too wide"
tickets: [783065]
- title: "Fix multiple bracket types in author names not handled correctly when generating author sort string"
tickets: [782551]
- title: "MOBI Input: Don't error out when detecting TOC structure if one of the elements has an invalid margin unit"
- title: "More fixes for japanese language calibre on windows"
tickets: [782408]
- title: "Linux binaries: Always use either Cleanlook or Plastique styles for the GUI if no style can be loaded from the host computer"
improved recipes:
- Newsweek
- Economist
- Dvhn
- United Daily
- Dagens Nyheter
- GoComics
- faz.net
- golem.de
new recipes:
- title: National Geographic
author: gagsays
- title: Various German news sources
author: schuster
- title: Dilema Veche
author: Silviu Cotoara
- title: "Glamour, Good to Know, Good Housekeeping and Men's Health"
author: Anonymous
- title: "Financial Sense and iProfessional"
author: Darko Miletic
- version: 0.8.1
date: 2011-05-13
new features:
- title: "Add Amazon DE, Beam EBooks, Beam DE, Weightless Books, Wizards Tower Books to the list of ebook stores searched by Get Books"
- title: "TXT output: All new Textile output with much greater preservation of formatting from the input document"
- title: "Migrate metadata plugin for Douban Books to the 0.8 API"
- title: "Driver for Dell Streak on windows"
- title: "Add menu items to Get Books action to search by title and author of current book"
- title: "Add title_sort as available field to CSV/XML catalogs"
- title: "Add a context menu to the manage authors dialog"
- title: "Add a button to paste isbn into the identifiers field in the edit metadata dialog automatically"
bug fixes:
- title: "Amazon metadata download plugin: Fix links being stripped from comments. Also fix ratings/isbn not being parsed from kindle edition pages."
tickets: [782012]
- title: "Fix one source of segfaults on shutdown in the linux binary builds."
- title: "Allow the use of condensed/expanded fonts as interface fonts"
- title: "EPUB Input: Ignore missing cover file when converting, instead of erroring out."
tickets: [781848]
- title: "Fix custom identifier being erased by metadata download"
tickets: [781759]
- title: "Fix regression that broke various things when using Japanese language calibre on windows"
tickets: [780804]
- title: "RTF Input: Handle null color codes correctly"
tickets: [780728]
- title: "ODT Input: Handle inline special styles defined on <text:span> tags."
tickets: [780250]
- title: "Fix error when pressing next previous button with an empty search in the Plugins preferences"
tickets: [781135]
- title: "Ignore 'Unknown' author when downloading metadata."
tickets: [779348]
- title: "Fix timezone bug when setting dates in the edit metadata dialog"
tickets: [779497]
- title: "Fix ebook-convert not recognizing output paths starting with .."
tickets: [779322]
improved recipes:
- "Strategy+Business"
- Readers Digest
- Ming Pao
- Telepolis
- Fronda
- Rzeczpospolita
new recipes:
- title: "Various Taiwanese news sources"
author: Eddie Lau
- title: Replica Vedetelor, Ziua Veche
author: Silviu Cotoara
- title: Welt der Physik
author: schuster
- title: Korea Herald
author: Seongkyoun Yoo
- version: 0.8.0 - version: 0.8.0
date: 2010-05-06 date: 2010-05-06

View File

@ -93,7 +93,7 @@ class Arcamax(BasicNewsRecipe):
for page in pages: for page in pages:
page_soup = self.index_to_soup(url) page_soup = self.index_to_soup(url)
if page_soup: if page_soup:
title = page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0] title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0])
page_url = url page_url = url
# orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href'] # orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href'] prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
@ -127,4 +127,3 @@ class Arcamax(BasicNewsRecipe):
p{font-family:Arial,Helvetica,sans-serif;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
''' '''

46
recipes/bild_de.recipe Normal file
View File

@ -0,0 +1,46 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Bild.de'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
# get cover from myspace
cover_url = 'http://a3.l3-images.myspacecdn.com/images02/56/0232f842170b4d349779f8379c27e073/l.jpg'
# set what to fetch on the site
remove_tags_before = dict(name = 'h2', attrs={'id':'cover'})
remove_tags_after = dict(name ='div', attrs={'class':'back'})
# thanx to kiklop74 for code (see sticky thread -> Recipes - Re-usable code)
# this one removes a lot of direct-link's
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
# remove the ad's
filter_regexps = [r'.\.smartadserver\.com']
def skip_ad_pages(self, soup):
return None
#get the real url behind .feedsportal.com and fetch the artikels
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
#list of the rss source from www.bild.de
feeds = [(u'Überblick', u'http://rss.bild.de/bild.xml'),
(u'News', u'http://rss.bild.de/bild-news.xml'),
(u'Politik', u'http://rss.bild.de/bild-politik.xml'),
(u'Unterhaltung', u'http://rss.bild.de/bild-unterhaltung.xml'),
(u'Sport', u'http://rss.bild.de/bild-sport.xml'),
(u'Lifestyle', u'http://rss.bild.de/bild-lifestyle.xml'),
(u'Ratgeber', u'http://rss.bild.de/bild-ratgeber.xml')
]

View File

@ -0,0 +1,33 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Börse-online'
__author__ = 'schuster'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.dpv.de/images/1995/source.gif'
masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
remove_tags_bevor = [dict(name='h3')]
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}),
dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']),
dict(name=['h2', 'Gesamtranking', 'h3',''])]
def print_version(self, url):
return url.replace('.html#nv=rss', '.html?mode=print')
feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')]

61
recipes/capital_de.recipe Normal file
View File

@ -0,0 +1,61 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
title = u'Capital.de'
language = 'de'
__author__ = 'schuster'
oldest_article =7
max_articles_per_feed = 35
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
masthead_url = 'http://www.wirtschaftsmedien-shop.de/media/stores/wirtschaftsmedien/capital/teaser_large_abo.jpg'
cover_url = 'http://d1kb9jvg6ylufe.cloudfront.net/WebsiteCMS/de/unternehmen/linktipps/mainColumn/08/image/DE_Capital_bis20mm_SW.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
return url.replace ('nv=rss#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', 'mode=print')
remove_tags_bevor = [dict(name='td', attrs={'class':'textcell'})]
remove_tags_after = [dict(name='div', attrs={'class':'artikelsplit'})]
feeds = [ (u'Wirtschaftsmagazin', u'http://www.capital.de/rss/'),
(u'Unternehmen', u'http://www.capital.de/rss/unternehmen'),
(u'Finanz & Geldanlage', u'http://www.capital.de/rss/finanzen/geldanlage')]
def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'artikelsplit'})
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'printable'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('div', attrs={'class':'artikelsplit'}):
item.extract()
self.append_page(soup, soup.body, 3)
pager = soup.find('div',attrs={'class':'artikelsplit'})
if pager:
pager.extract()
return self.adeify_images(soup)
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'artikelsplit']}),
dict(id=['topNav', 'mainNav', 'subNav', 'socialmedia', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere']),
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
dict(rel=['canonical'])]

View File

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'中時電子報'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'),
(u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'),
(u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'),
(u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'),
(u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'),
(u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'),
(u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'),
(u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'),
(u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'),
(u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'),
#(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'),
#(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
#(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
]
__author__ = 'einstuerzende, updated by Eddie Lau'
__version__ = '1.0'
language = 'zh'
publisher = 'China Times Group'
description = 'China Times (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
remove_tags = [dict(name='div', attrs={'class':['focus-news']})]

View File

@ -0,0 +1,34 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
title = u'Cosmopolitan.de'
__author__ = 'schuster'
oldest_article = 7
language = 'de'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
cover_url = 'http://www.cosmopolitan.com/cm/shared/site_images/print_this/cosmopolitan_logo.gif'
remove_tags_before = dict(name = 'h1', attrs={'class':'artikel'})
remove_tags_after = dict(name ='div', attrs={'class':'morePages'})
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
'''
remove_tags = [ dict(id='strong'),
dict(title='strong'),
dict(name='span'),
dict(name='li', attrs={'class':'large'}),
dict(name='ul', attrs={'class':'articleImagesPortrait clearfix'}),
dict(name='p', attrs={'class':'external'}),
dict(name='a', attrs={'target':'_blank'}),]
feeds = [ (u'Komplett', u'http://www.cosmopolitan.de/rss/allgemein.xml'),
(u'Mode', u'http://www.cosmopolitan.de/rss/mode.xml'),
(u'Beauty', u'http://www.cosmopolitan.de/rss/beauty.xml'),
(u'Liebe&Sex', u'http://www.cosmopolitan.de/rss/liebe.xml'),
(u'Psychologie', u'http://www.cosmopolitan.de/rss/psychologie.xml'),
(u'Job&Karriere', u'http://www.cosmopolitan.de/rss/job.xml'),
(u'Lifestyle', u'http://www.cosmopolitan.de/rss/lifestyle.xml'),
(u'Shopping', u'http://www.cosmopolitan.de/rss/shopping.xml'),
(u'Bildergalerien', u'http://www.cosmopolitan.de/rss/bildgalerien.xml')]

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
dilemaveche.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DilemaVeche(BasicNewsRecipe):
title = u'Dilema Veche'
__author__ = u'Silviu Cotoar\u0103'
description = u'Sunt vechi, domnule!'
publisher = u'Dilema Veche'
oldest_article = 50
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare'
encoding = 'utf-8'
cover_url = 'http://www.dilemaveche.ro/sites/all/themes/dilema/theme/dilema_two/layouter/dilema_two_homepage/logo.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='h1', attrs={'class':'art_title'})
, dict(name='h1', attrs={'class':'art_title online'})
, dict(name='div', attrs={'class':'item'})
, dict(name='div', attrs={'class':'art_content'})
]
remove_tags = [
dict(name='div', attrs={'class':['article_details']})
, dict(name='div', attrs={'class':['controale']})
, dict(name='div', attrs={'class':['art_related_left']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['article_details']})
]
feeds = [
(u'Feeds', u'http://www.dilemaveche.ro/rss.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

53
recipes/divahair.recipe Normal file
View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
divahair.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DivaHair(BasicNewsRecipe):
title = u'Diva Hair'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Coafuri, frizuri, tunsori ..'
publisher = u'Diva Hair'
category = u'Ziare,Stiri,Coafuri,Femei'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.divahair.ro/imgs/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='td', attrs={'class':'spatiuart'})
, dict(name='div', attrs={'class':'spatiuart'})
]
remove_tags = [
dict(name='div', attrs={'class':'categorie'})
, dict(name='div', attrs={'class':'gri gri2 detaliiart'})
, dict(name='div', attrs={'class':'articol_box_bottom'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'articol_box_bottom'})
]
feeds = [ (u'\u0218tiri', u'http://www.divahair.ro/feed') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -37,7 +37,7 @@ class DN_se(BasicNewsRecipe):
,(u'Kultur' , u'http://www.dn.se/kultur-rss' ) ,(u'Kultur' , u'http://www.dn.se/kultur-rss' )
] ]
keep_only_tags = [dict(name='div', attrs={'id':'article'})] keep_only_tags = [dict(name='div', attrs={'id':'article-content'})]
remove_tags_before = dict(name='h1') remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div',attrs={'id':'byline'}) remove_tags_after = dict(name='div',attrs={'id':'byline'})
remove_tags = [ remove_tags = [
@ -45,5 +45,5 @@ class DN_se(BasicNewsRecipe):
,dict(name='div',attrs={'id':'hook'}) ,dict(name='div',attrs={'id':'hook'})
] ]

View File

@ -1,19 +1,21 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1302341394(BasicNewsRecipe): class AdvancedUserRecipe1302341394(BasicNewsRecipe):
title = u'DvhN' title = u'DvhN'
oldest_article = 1 __author__ = 'Reijndert'
oldest_article = 7
max_articles_per_feed = 200 max_articles_per_feed = 200
__author__ = 'Reijndert'
no_stylesheets = True no_stylesheets = True
cover_url = 'http://www.dvhn.nl/template/Dagblad_v2.0/gfx/logo_DvhN.gif' cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg'
language = 'nl' language = 'nl'
country = 'NL' country = 'NL'
version = 1 version = 1
publisher = u'Dagblad van het Noorden' publisher = u'Dagblad van het Noorden'
category = u'Nieuws' category = u'Nieuws'
description = u'Nieuws uit Noord Nederland' description = u'Nieuws uit Noord Nederland'
timefmt = ' %Y-%m-%d (%a)'
keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'}) keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'})
@ -21,11 +23,26 @@ class AdvancedUserRecipe1302341394(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
dict(name=['object','link','iframe','base']) dict(name='span',attrs={'class':'location'})
,dict(name='span',attrs={'class':'copyright'})
] ]
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'), (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'), (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'), (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'), (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'), (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'), (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'), (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')] preprocess_regexps = [
(re.compile(r'<a.*?>'), lambda h1: '')
,(re.compile(r'</a>'), lambda h2: '')
,(re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'), lambda h3: '')
,(re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '')
]
feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss')
, (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss')
, (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss')
, (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss')
, (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss')
, (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss')
, (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss')
, (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')
]
extra_css = ''' extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;} body {font-family: verdana, arial, helvetica, geneva, sans-serif;}

View File

@ -20,7 +20,7 @@ class Economist(BasicNewsRecipe):
INDEX = 'http://www.economist.com/printedition' INDEX = 'http://www.economist.com/printedition'
description = ('Global news and current affairs from a European' description = ('Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)') ' perspective. Best downloaded on Friday mornings (GMT)')
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
oldest_article = 7.0 oldest_article = 7.0
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg' cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
remove_tags = [ remove_tags = [

View File

@ -14,7 +14,7 @@ class Economist(BasicNewsRecipe):
description = ('Global news and current affairs from a European' description = ('Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT).' ' perspective. Best downloaded on Friday mornings (GMT).'
' Much slower than the print edition based version.') ' Much slower than the print edition based version.')
extra_css = '.headline {font-size: x-large;} \n h2 { font-size: small; } \n h1 { font-size: medium; }'
oldest_article = 7.0 oldest_article = 7.0
cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg' cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
remove_tags = [ remove_tags = [

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
elmundo.es elmundo.es
''' '''
@ -10,15 +10,24 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ElMundo(BasicNewsRecipe): class ElMundo(BasicNewsRecipe):
title = 'El Mundo' title = 'El Mundo'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Spain' description = 'Lider de informacion en espaniol'
publisher = 'El Mundo' publisher = 'Unidad Editorial Informacion General S.L.U.'
category = 'news, politics, Spain' category = 'news, politics, Spain'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'iso8859_15' encoding = 'iso8859_15'
language = 'es' language = 'es_ES'
masthead_url = 'http://estaticos03.elmundo.es/elmundo/iconos/v4.x/v4.01/bg_h1.png'
publication_type = 'newspaper'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
.metadata_noticia{font-size: small}
h1,h2,h3,h4,h5,h6,.subtitulo {color: #3F5974}
.hora{color: red}
.update{color: gray}
"""
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
@ -30,22 +39,31 @@ class ElMundo(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'class':'noticia'})] keep_only_tags = [dict(name='div', attrs={'class':'noticia'})]
remove_tags_before = dict(attrs={'class':['titular','antetitulo'] }) remove_tags_before = dict(attrs={'class':['titular','antetitulo'] })
remove_tags_after = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']}) remove_tags_after = dict(name='div' , attrs={'id':['desarrollo_noticia','tamano']})
remove_attributes = ['lang','border']
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':['herramientas','publicidad_google']}) dict(name='div', attrs={'class':['herramientas','publicidad_google']})
,dict(name='div', attrs={'id':'modulo_multimedia' }) ,dict(name='div', attrs={'id':'modulo_multimedia' })
,dict(name='ul', attrs={'class':'herramientas' }) ,dict(name='ul', attrs={'class':'herramientas' })
,dict(name=['object','link']) ,dict(name=['object','link','embed','iframe','base','meta'])
] ]
feeds = [ feeds = [
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' ) (u'Portada' , u'http://estaticos.elmundo.es/elmundo/rss/portada.xml' )
,(u'Deportes' , u'http://rss.elmundo.es/rss/descarga.htm?data2=14') ,(u'Deportes' , u'http://estaticos.elmundo.es/elmundodeporte/rss/portada.xml')
,(u'Economia' , u'http://rss.elmundo.es/rss/descarga.htm?data2=7' ) ,(u'Economia' , u'http://estaticos.elmundo.es/elmundo/rss/economia.xml' )
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' ) ,(u'Espana' , u'http://estaticos.elmundo.es/elmundo/rss/espana.xml' )
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' ) ,(u'Internacional' , u'http://estaticos.elmundo.es/elmundo/rss/internacional.xml' )
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' ) ,(u'Cultura' , u'http://estaticos.elmundo.es/elmundo/rss/cultura.xml' )
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' ) ,(u'Ciencia/Ecologia', u'http://estaticos.elmundo.es/elmundo/rss/ciencia.xml' )
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26') ,(u'Comunicacion' , u'http://estaticos.elmundo.es/elmundo/rss/comunicacion.xml' )
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76') ,(u'Television' , u'http://estaticos.elmundo.es/elmundo/rss/television.xml' )
] ]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def get_article_url(self, article):
return article.get('guid', None)

74
recipes/express_de.recipe Normal file
View File

@ -0,0 +1,74 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Express.de'
__author__ = 'schuster'
oldest_article = 2
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
language = 'de'
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
'''
remove_javascript = True
remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
remove_tags_after = [dict(name='div', attrs={'class':'MoreNews'})]
remove_tags = [dict(id='kalaydo'),
dict(id='Header'),
dict(id='Searchline'),
dict(id='MainNav'),
dict(id='Logo'),
dict(id='MainLinkSpacer'),
dict(id='MainLinks'),
dict(title='Diese Seite Bookmarken'),
dict(name='span'),
dict(name='div', attrs={'class':'spacer_leftneu'}),
dict(name='div', attrs={'class':'button kalaydologo'}),
dict(name='div', attrs={'class':'button stellenneu'}),
dict(name='div', attrs={'class':'button autoneu'}),
dict(name='div', attrs={'class':'button immobilienneu'}),
dict(name='div', attrs={'class':'button kleinanzeigen'}),
dict(name='div', attrs={'class':'button tiereneu'}),
dict(name='div', attrs={'class':'button ferienwohnungen'}),
dict(name='div', attrs={'class':'button inserierenneu'}),
dict(name='div', attrs={'class':'spacer_rightneu'}),
dict(name='div', attrs={'class':'spacer_rightcorner'}),
dict(name='div', attrs={'class':'HeaderMetaNav'}),
dict(name='div', attrs={'class':'HeaderSearchOption'}),
dict(name='div', attrs={'class':'HeaderSearch'}),
dict(name='div', attrs={'class':'sbutton'}),
dict(name='div', attrs={'class':'active'}),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
(u'Regional - Köln', u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
(u'Regional - Bonn', u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
(u'Regional - Düsseldorf', u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
(u'Regional - Region', u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
(u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
(u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
(u'1.FC Köln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
(u'Alemannia Aachen News', u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
(u'Borussia M~Gladbach', u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
(u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
(u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
(u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
]

View File

@ -1,51 +1,38 @@
__license__ = 'GPL v3' from calibre.web.feeds.recipes import BasicNewsRecipe
__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>' class AdvancedUserRecipe1303841067(BasicNewsRecipe):
'''
Profile to download FAZ.net
'''
from calibre.web.feeds.news import BasicNewsRecipe title = u'Faz.net'
__author__ = 'schuster'
class FazNet(BasicNewsRecipe): remove_tags = [dict(attrs={'class':['right', 'ArrowLinkRight', 'ModulVerlagsInfo', 'left', 'Head']}),
title = 'FAZ NET' dict(id=['BreadCrumbs', 'tstag', 'FazFooterPrint']),
__author__ = 'Kovid Goyal, Darko Miletic' dict(name=['script', 'noscript', 'style'])]
oldest_article = 2
description = 'Frankfurter Allgemeine Zeitung' description = 'Frankfurter Allgemeine Zeitung'
publisher = 'FAZ Electronic Media GmbH' max_articles_per_feed = 100
category = 'news, politics, Germany' no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = 'de' language = 'de'
remove_javascript = True
max_articles_per_feed = 30 cover_url = 'http://www.faz.net/f30/Images/Logos/logo.gif'
no_stylesheets = True
encoding = 'utf-8'
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
remove_tags = [
dict(name=['object','link','embed','base'])
,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
]
feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ]
def print_version(self, url): def print_version(self, url):
article, sep, rest = url.partition('?') return url.replace('.html', '~Afor~Eprint.html')
return article.replace('.html', '~Afor~Eprint.html')
feeds = [(u'Politik', u'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'),
(u'Wirtschaft', u'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'),
(u'Feuilleton', u'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'),
(u'Sport', u'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'),
(u'Gesellschaft', u'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'),
(u'Finanzen', u'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'),
(u'Wissen', u'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'),
(u'Reise', u'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'),
(u'Technik & Motor', u'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'),
(u'Beruf & Chance', u'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml'),
(u'Kunstmarkt', u'http://www.faz.net/s/RubBC09F7BF72A2405A96718ECBFB68FBFE/Tpl~Epartner~SRss_.xml'),
(u'Immobilien ', u'http://www.faz.net/s/RubFED172A9E10F46B3A5F01B02098C0C8D/Tpl~Epartner~SRss_.xml'),
(u'Rhein-Main Zeitung', u'http://www.faz.net/s/RubABE881A6669742C2A5EBCB5D50D7EBEE/Tpl~Epartner~SRss_.xml'),
(u'Atomdebatte ', u'http://www.faz.net/s/Rub469C43057F8C437CACC2DE9ED41B7950/Tpl~Epartner~SRss_.xml')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
soup.head.insert(0,mtag)
del soup.body['onload']
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,64 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.financialsense.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class FinancialSense(BasicNewsRecipe):
title = 'Financial Sense'
__author__ = 'Darko Miletic'
description = 'Uncommon News & Views for the Wise Investor'
publisher = 'Financial Sense'
category = 'news, finances, politics, USA'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newsportal'
masthead_url = 'http://www.financialsense.com/sites/default/files/logo.jpg'
extra_css = """
body{font-family: Arial,"Helvetica Neue",Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
h2{color: gray}
.name{margin-right: 5em}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags =[dict(name=['meta','link','base','object','embed','iframe'])]
remove_tags_after=dict(attrs={'class':'vcard'})
keep_only_tags =[dict(attrs={'class':['title','post-meta','content','item-title','vcard']})]
remove_attributes=['lang','type']
feeds = [(u'Articles', u'http://feeds.feedburner.com/fso')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

48
recipes/focus_de.recipe Normal file
View File

@ -0,0 +1,48 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
title = u'Focus (DE)'
__author__ = 'Anonymous'
language = 'de'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
def print_version(self, url):
return url + '?drucken=1'
keep_only_tags = [
dict(name='div', attrs={'id':['article']}) ]
remove_tags = [dict(name='div', attrs={'class':'sidebar'}),
dict(name='div', attrs={'class':'commentForm'}),
dict(name='div', attrs={'class':'comment clearfix oid-3534591 open'}),
dict(name='div', attrs={'class':'similarityBlock'}),
dict(name='div', attrs={'class':'footer'}),
dict(name='div', attrs={'class':'getMoreComments'}),
dict(name='div', attrs={'class':'moreComments'}),
dict(name='div', attrs={'class':'ads'}),
dict(name='div', attrs={'class':'articleContent'}),
]
remove_tags_after = [
dict(name='div',attrs={'class':['commentForm','title', 'actions clearfix']})
]
feeds = [ (u'Eilmeldungen', u'http://rss2.focus.de/c/32191/f/533875/index.rss'),
(u'Auto-News', u'http://rss2.focus.de/c/32191/f/443320/index.rss'),
(u'Digital-News', u'http://rss2.focus.de/c/32191/f/443315/index.rss'),
(u'Finanzen-News', u'http://rss2.focus.de/c/32191/f/443317/index.rss'),
(u'Gesundheit-News', u'http://rss2.focus.de/c/32191/f/443314/index.rss'),
(u'Immobilien-News', u'http://rss2.focus.de/c/32191/f/443318/index.rss'),
(u'Kultur-News', u'http://rss2.focus.de/c/32191/f/443321/index.rss'),
(u'Panorama-News', u'http://rss2.focus.de/c/32191/f/533877/index.rss'),
(u'Politik-News', u'http://rss2.focus.de/c/32191/f/443313/index.rss'),
(u'Reisen-News', u'http://rss2.focus.de/c/32191/f/443316/index.rss'),
(u'Sport-News', u'http://rss2.focus.de/c/32191/f/443319/index.rss'),
(u'Wissen-News', u'http://rss2.focus.de/c/32191/f/533876/index.rss'),
]

View File

@ -21,14 +21,19 @@ class Fronda(BasicNewsRecipe):
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')] feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
keep_only_tags = [dict(name='h1', attrs={'class':'big'}), keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}),
dict(name='ul', attrs={'class':'about clear'}), dict(name='div', attrs={'class':'naglowek_tresc'}),
dict(name='div', attrs={'class':'content'})] dict(name='div', attrs={'id':'czytaj'}) ]
remove_tags = [dict(name='a', attrs={'class':'print'})]
preprocess_regexps = [ preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ (r'<a href="#" class="print">Drukuj</a>', lambda match: ''), [ (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
(r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''), (r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'), (r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>') ] (r'<h3>Zobacz t.*?</div>', lambda match: '</div>'),
(r'<p[^>]*>&nbsp;</p>', lambda match: ''),
(r'<p><span style=".*?"><br /></span></p> ', lambda match: ''),
(r'<a style=\'float:right;margin-top:3px;\' href="http://www.facebook.com/share.php?.*?</a>', lambda match: '')]
] ]

38
recipes/glamour.recipe Normal file
View File

@ -0,0 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Glamour (US)'
oldest_article = 21
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'en'
remove_javascript = True
__author__ = 'Anonymous'
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
def print_version(self, url):
return url + '?printable=true'
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'All Fashion', u'http://feeds.glamour.com/glamour/all_fashion'),
(u'All Beauty', u'http://feeds.glamour.com/glamour/all_beauty'),
(u'All Sex, Love & Life', u'http://feeds.glamour.com/glamour/sex_love_life'),
(u'All Health & Fitness', u'http://feeds.glamour.com/glamour/health_fitness'),
(u'Shopping', u'http://feeds.glamour.com/glamour/shopping'),
(u'Slaves to Fashion blog', u'http://feeds.glamour.com/glamour/slavestofashion'),
(u'The Girls in the Beauty Department', u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
(u'Smitten blog', u'http://feeds.glamour.com/glamour/smitten'),
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
(u'Single-ish blog', u'http://feeds.glamour.com/glamour/glamoursingle-ish'),
(u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
(u'Vitamin G blog', u'http://feeds.glamour.com/glamour/vitamin-g'),
(u'Margarita Shapes Up blog', u'http://feeds.glamour.com/glamour/margaritashapesup'),
(u'Little Miss Fortune blog', u'http://feeds.glamour.com/glamour/little-miss-fortune'),
]

View File

@ -6,13 +6,13 @@ __copyright__ = 'Copyright 2010 Starson17'
www.gocomics.com www.gocomics.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import mechanize import mechanize, re
class GoComics(BasicNewsRecipe): class GoComics(BasicNewsRecipe):
title = 'GoComics' title = 'GoComics'
__author__ = 'Starson17' __author__ = 'Starson17'
__version__ = '1.03' __version__ = '1.05'
__date__ = '09 October 2010' __date__ = '19 may 2011'
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.' description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
category = 'news, comics' category = 'news, comics'
language = 'en' language = 'en'
@ -20,6 +20,7 @@ class GoComics(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
cover_url = 'http://paulbuckley14059.files.wordpress.com/2008/06/calvin-and-hobbes.jpg' cover_url = 'http://paulbuckley14059.files.wordpress.com/2008/06/calvin-and-hobbes.jpg'
remove_attributes = ['style']
####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ######## ####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
# num_comics_to_get - I've tried up to 99 on Calvin&Hobbes # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
@ -40,6 +41,8 @@ class GoComics(BasicNewsRecipe):
remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}), remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
dict(name='div', attrs={'class':['tag-wrapper']}), dict(name='div', attrs={'class':['tag-wrapper']}),
dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
dict(name='ul', attrs={'class':['share-nav','feature-nav']}), dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
] ]

View File

@ -1,83 +1,70 @@
#!/usr/bin/env python from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
from calibre.web.feeds.news import BasicNewsRecipe
class golem_ger(BasicNewsRecipe):
title = u'Golem.de' title = u'Golem.de'
language = 'de' __author__ = 'schuster'
__author__ = 'Kovid Goyal'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 10
language = 'de' no_stylesheets = True
lang = 'de-DE' use_embedded_content = False
no_stylesheets = True language = 'de'
encoding = 'iso-8859-1' cover_url = 'http://www.e-energy.de/images/logo_golem.jpg'
recursions = 1 masthead_url = 'http://www.golem.de/staticrl/images/logo.png'
match_regexps = [r'http://www.golem.de/.*.html']
keep_only_tags = [
dict(name='h1', attrs={'class':'artikelhead'}),
dict(name='p', attrs={'class':'teaser'}),
dict(name='div', attrs={'class':'artikeltext'}),
dict(name='h2', attrs={'id':'artikelhead'}),
]
remove_tags = [
dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}),
dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}),
dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}),
dict(name='td', attrs={'class':['xsmall']}),
]
# remove_tags_after = [
# dict(name='div', attrs={'id':['contentad2']})
# ]
feeds = [
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
(u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
(u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'),
(u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0')
]
feeds = [
(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
(u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
]
extra_css = ''' extra_css = '''
h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;} h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
.teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
.xsmall{font-style:italic;font-size:x-small;}
.td{font-style:italic;font-size:x-small;}
img {align:left;}
''' '''
remove_javascript = True
remove_tags_befor = [dict(name='header', attrs={'class':'cluster-header'})]
remove_tags_after = [dict(name='p', attrs={'class':'meta'})]
remove_tags = [dict(rel='nofollow'),
dict(name='header', attrs={'id':'header'}),
dict(name='div', attrs={'class':'dh1'}),
dict(name='label', attrs={'class':'implied'}),
dict(name='section', attrs={'id':'comments'}),
dict(name='li', attrs={'class':'gg_prebackcounterItem'}),
dict(name='li', attrs={'class':'gg_prebackcounterItem gg_embeddedIndexCounter'}),
dict(name='img', attrs={'class':'gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer'}),
dict(name='div', attrs={'target':'_blank'})
]
def get_browser(self, *args, **kwargs):
from calibre import browser
kwargs['user_agent'] = 'mozilla'
return browser(*args, **kwargs)
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [(u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
(u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
(u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
(u'Handy', u'http://rss.golem.de/rss.php?tp=handy&feed=RSS2.0'),
(u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS2.0'),
(u'Mobile', u'http://rss.golem.de/rss.php?tp=mc&feed=RSS2.0'),
(u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
(u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
(u'Security', u'http://rss.golem.de/rss.php?tp=sec&feed=RSS2.0'),
(u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
(u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
(u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
(u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
(u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
(u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
(u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
(u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
(u'Wirtschaft', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0'),
(u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0')
]

View File

@ -0,0 +1,31 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Good House Keeping'
language = 'en'
__author__ = 'Anonymous'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
def print_version(self,url):
segments = url.split('/')
printURL = '/'.join(segments[0:3]) + '/print-this/' + '/'.join(segments[4:])
return printURL
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'Recipes & Entertaining', u'http://www.goodhousekeeping.com/food/food-rss/?src=rss'),
(u'Home & House', u'http://www.goodhousekeeping.com/home/home-rss/?src=rss'),
(u'Diet & Health', u'http://www.goodhousekeeping.com/health/health-rss/?src=rss'),
(u'Beauty & Style', u'http://www.goodhousekeeping.com/beauty/beauty-rss/?src=rss'),
(u'Family & Pets', u'http://www.goodhousekeeping.com/family/family-rss/?src=rss'),
(u'Saving Money', u'http://www.goodhousekeeping.com/money/money-rss/?src=rss'),
]

View File

@ -0,0 +1,32 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305547242(BasicNewsRecipe):
title = u'Good to Know (uk)'
oldest_article = 14
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
__author__ = 'Anonymous'
language = 'en_GB'
remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})]
def print_version(self, url):
return url + '/print/1'
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'),
(u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'),
(u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'),
(u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'),
(u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'),
(u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'),
(u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'),
(u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'),
]

36
recipes/grrm.recipe Normal file
View File

@ -0,0 +1,36 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
grrm.livejournal.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NotABlog(BasicNewsRecipe):
title = 'Not A Blog - George R.R. Martin'
__author__ = 'Darko Miletic'
description = 'George R.R. Martin'
oldest_article = 15
max_articles_per_feed = 100
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
conversion_options = {
'comment' : description
, 'tags' : 'sf, fantasy, game of thrones'
, 'publisher': 'George R.R. Martin'
, 'language' : language
}
feeds = [(u'Posts', u'http://grrm.livejournal.com/data/rss')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

Binary file not shown.

After

Width:  |  Height:  |  Size: 558 B

BIN
recipes/icons/divahair.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 675 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 550 B

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 702 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
recipes/icons/marca.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 293 B

BIN
recipes/icons/mayra.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 620 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 B

BIN
recipes/icons/natgeo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 247 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 837 B

BIN
recipes/icons/osnews_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1006 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 709 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 722 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 425 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 925 B

BIN
recipes/icons/wash_post.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
recipes/icons/ziuaveche.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 554 B

32
recipes/impulse_de.recipe Normal file
View File

@ -0,0 +1,32 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305470859(BasicNewsRecipe):
title = u'Impulse.de'
language = 'de'
__author__ = 'schuster'
oldest_article =14
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
cover_url = 'http://www.bvk.de/files/image/bilder/Logo%20Impulse.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
img {min-width:300px; max-width:600px; min-height:300px; max-height:800px}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
return url.replace ('#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', '?mode=print')
remove_tags_bevor = [dict(name='h1', attrs={'class':'h2'})]
remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})]
feeds = [ (u'impulstest', u'http://www.impulse.de/rss/')]
remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'kasten_artikel']}),
dict(id=['metaNav', 'impKopf', 'impTopNav', 'impSubNav', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere', 'socialmedia', 'rating_open']),
dict(span=['ratingtext', 'Gesamtranking', 'h3','']),
dict(rel=['canonical'])]

View File

@ -0,0 +1,79 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.iprofesional.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class iProfesional(BasicNewsRecipe):
title = 'iProfesional.com'
__author__ = 'Darko Miletic'
description = 'Las ultimas noticias sobre profesionales'
publisher = 'Emprendimientos Corporativos S.A.'
category = 'news, IT, impuestos, negocios, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'es_AR'
remove_empty_feeds = True
publication_type = 'nesportal'
masthead_url = 'http://www.iprofesional.com/img/logo-iprofesional.png'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.titulo-interior{font-family: Georgia,"Times New Roman",Times,serif}
.autor-nota{font-size: small; font-weight: bold; font-style: italic; color: gray}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(attrs={'class':['fecha','interior-nota']})]
remove_tags = [
dict(name=['meta','link','base','embed','object','iframe'])
,dict(attrs={'class':['menu-imprimir','guardarNota','IN-widget','fin','permalink']})
]
remove_attributes=['lang','xmlns:og','xmlns:fb']
feeds = [
(u'Ultimas noticias' , u'http://feeds.feedburner.com/iprofesional-principales-noticias')
,(u'Finanzas' , u'http://feeds.feedburner.com/iprofesional-finanzas' )
,(u'Impuestos' , u'http://feeds.feedburner.com/iprofesional-impuestos' )
,(u'Negocios' , u'http://feeds.feedburner.com/iprofesional-economia' )
,(u'Comercio Exterior' , u'http://feeds.feedburner.com/iprofesional-comercio-exterior' )
,(u'Tecnologia' , u'http://feeds.feedburner.com/iprofesional-tecnologia' )
,(u'Management' , u'http://feeds.feedburner.com/iprofesional-managment' )
,(u'Marketing' , u'http://feeds.feedburner.com/iprofesional-marketing' )
,(u'Legales' , u'http://feeds.feedburner.com/iprofesional-legales' )
,(u'Autos' , u'http://feeds.feedburner.com/iprofesional-autos' )
,(u'Vinos' , u'http://feeds.feedburner.com/iprofesional-vinos-bodegas' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -3,8 +3,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1295262156(BasicNewsRecipe): class AdvancedUserRecipe1295262156(BasicNewsRecipe):
title = u'kath.net' title = u'kath.net'
__author__ = 'Bobus' __author__ = 'Bobus'
description = u'Katholische Nachrichten'
oldest_article = 7 oldest_article = 7
language = 'en' language = 'de'
max_articles_per_feed = 100 max_articles_per_feed = 100
feeds = [(u'kath.net', u'http://www.kath.net/2005/xml/index.xml')] feeds = [(u'kath.net', u'http://www.kath.net/2005/xml/index.xml')]

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'自由電子報'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'),
(u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'),
(u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'),
(u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'),
(u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'),
(u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'),
(u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'),
(u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'),
(u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'),
(u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'),
(u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'),
(u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'),
(u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'),
(u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'),
(u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'),
]
extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
__author__ = 'einstuerzende, updated by Eddie Lau'
__version__ = '1.1'
language = 'zh'
publisher = 'Liberty Times Group'
description = 'Liberty Times (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]

View File

@ -1,14 +1,11 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.marca.com www.marca.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Marca(BasicNewsRecipe): class Marca(BasicNewsRecipe):
title = 'Marca' title = 'Marca'
@ -22,35 +19,30 @@ class Marca(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
delay = 1 delay = 1
encoding = 'iso-8859-15' encoding = 'iso-8859-15'
language = 'es' language = 'es_ES'
publication_type = 'newsportal'
masthead_url = 'http://estaticos.marca.com/deporte/img/v3.0/img_marca-com.png'
extra_css = """
body{font-family: Tahoma,Geneva,sans-serif}
h1,h2,h3,h4,h5,h6{font-family: 'LatoBlack',Tahoma,Geneva,sans-serif}
.cab_articulo h4 {font-family: Georgia,"Times New Roman",Times,serif}
.antetitulo{text-transform: uppercase}
"""
direction = 'ltr' feeds = [(u'Portada', u'http://estaticos.marca.com/rss/portada.xml')]
html2lrf_options = [ keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','cuerpo_articulo']})]
'--comment' , description remove_attributes = ['lang']
, '--category' , category remove_tags = [
, '--publisher', publisher dict(name=['object','link','script','embed','iframe','meta','base'])
] ,dict(name='div', attrs={'class':'tabs'})
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [(u'Portada', u'http://rss.marca.com/rss/descarga.htm?data2=425')]
keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','col_izq']})]
remove_tags = [
dict(name=['object','link','script'])
,dict(name='div', attrs={'class':['colC','peu']})
,dict(name='div', attrs={'class':['utilidades estirar','bloque_int_corr estirar']})
] ]
remove_tags_after = [dict(name='div', attrs={'class':'bloque_int_corr estirar'})]
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['dir' ] = self.direction
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
soup.head.insert(0,mcharset)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
def get_article_url(self, article):
return article.get('guid', None)

22
recipes/max_planck.recipe Normal file
View File

@ -0,0 +1,22 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Max-Planck-Inst.'
__author__ = 'schuster'
remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
dict(id=['ie_clearing', 'col2', 'col2_content']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
def print_version(self, url):
split_url = url.split("/")
print_url = 'http://www.mpg.de/print/' + split_url[3]
return print_url
feeds = [(u'Forschung', u'http://www.mpg.de/de/forschung.rss')]

51
recipes/mayra.recipe Normal file
View File

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
mayra.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Mayra(BasicNewsRecipe):
title = u'Mayra'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Traieste urban, cool, sexy'
publisher = 'Mayra'
category = 'Ziare,Stiri,Reviste'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://img.konkurs.ro/img/concursuri-cu-premii/147/14672_front.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'article_details'})
]
remove_tags = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
, dict(name='p', attrs={'id':'tags'})
, dict(name='span', attrs={'id':'tweet-button'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
]
feeds = [ (u'\u0218tiri', u'http://www.mayra.ro/rss') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,10 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305636254(BasicNewsRecipe):
title = u'Mens Health (US)'
language = 'en'
__author__ = 'Anonymous'
oldest_article = 14
max_articles_per_feed = 100
feeds = [(u'News', u'http://blogs.menshealth.com/health-headlines/feed')]

View File

@ -1,15 +1,18 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau' __copyright__ = '2010-2011, Eddie Lau'
# Users of Kindle 3 (with limited system-level CJK support) # Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False". # please replace the following "True" with "False".
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn it to True if your device supports display of CJK titles # Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Trun below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
''' '''
Change Log: Change Log:
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns" 2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections 2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
@ -32,41 +35,43 @@ import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe): class MPHKRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong' title = 'Ming Pao - Hong Kong'
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = 'Eddie Lau' __author__ = 'Eddie Lau'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
publisher = 'MingPao' publisher = 'MingPao'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'zh' language = 'zh'
encoding = 'Big5-HKSCS' encoding = 'Big5-HKSCS'
recursions = 0 recursions = 0
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
timefmt = '' timefmt = ''
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'), keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}), dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}) dict(attrs={'class':['photo']}),
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
] ]
remove_tags = [dict(name='style'), remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='table')] # for content fetched from life.mingpao.com dict(name='table')] # for content fetched from life.mingpao.com
remove_attributes = ['width'] remove_attributes = ['width']
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE), (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'), lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE), (re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
@ -80,10 +85,10 @@ class MPHKRecipe(BasicNewsRecipe):
lambda match: "</b>") lambda match: "</b>")
] ]
def image_url_processor(cls, baseurl, url): def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional # trick: break the url at the first occurance of digit, add an additional
# '_' at the front # '_' at the front
# not working, may need to move this to preprocess_html() method # not working, may need to move this to preprocess_html() method
# minIdx = 10000 # minIdx = 10000
# i0 = url.find('0') # i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx: # if i0 >= 0 and i0 < minIdx:
@ -115,314 +120,357 @@ class MPHKRecipe(BasicNewsRecipe):
# i9 = url.find('9') # i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx: # if i9 >= 0 and i9 < minIdx:
# minIdx = i9 # minIdx = i9
return url return url
def get_dtlocal(self): def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow() dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available # convert UTC to local hk time - at around HKT 6.00am, all news are available
dt_local = dt_utc - datetime.timedelta(-2.0/24) dt_local = dt_utc - datetime.timedelta(-2.0/24)
return dt_local return dt_local
def get_fetchdate(self): def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d") return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self): def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d") return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self): def get_fetchday(self):
# convert UTC to local hk time - at around HKT 6.00am, all news are available # dt_utc = datetime.datetime.utcnow()
return self.get_dtlocal().strftime("%d") # convert UTC to local hk time - at around HKT 6.00am, all news are available
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg' cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
try: try:
br.open(cover) br.open(cover)
except: except:
cover = None cover = None
return cover return cover
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
dateStr = self.get_fetchdate() dateStr = self.get_fetchdate()
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), if __UseLife__:
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
articles = self.parse_section(url) (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
if articles: (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
feeds.append((title, articles)) (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
# special- editorial for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
if ed_articles: articles = self.parse_section(url)
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), # special- editorial
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]: if ed_articles:
articles = self.parse_section(url) feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
if articles:
feeds.append((title, articles))
# special - finance for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
if fin_articles: articles = self.parse_section(url)
feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) if articles:
feeds.append((title, articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), # special - finance
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
articles = self.parse_section(url) fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if articles: if fin_articles:
feeds.append((title, articles)) feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
# special - entertainment for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
if ent_articles: articles = self.parse_section(url)
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), # special - entertainment
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
articles = self.parse_section(url) if ent_articles:
if articles: feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns # special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn') col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles: if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles)) feeds.append((u'\u5c08\u6b04 Columns', col_articles))
return feeds return feeds
def parse_section(self, url): # parse from news.mingpao.com
dateStr = self.get_fetchdate() def parse_section(self, url):
soup = self.index_to_soup(url) dateStr = self.get_fetchdate()
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']}) soup = self.index_to_soup(url)
current_articles = [] divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
included_urls = [] current_articles = []
divs.reverse() included_urls = []
for i in divs: divs.reverse()
a = i.find('a', href = True) for i in divs:
title = self.tag_to_string(a) a = i.find('a', href = True)
url = a.get('href', False) title = self.tag_to_string(a)
url = 'http://news.mingpao.com/' + dateStr + '/' +url url = a.get('href', False)
if url not in included_urls and url.rfind('Redirect') == -1: url = 'http://news.mingpao.com/' + dateStr + '/' +url
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) if url not in included_urls and url.rfind('Redirect') == -1:
included_urls.append(url) current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
current_articles.reverse() included_urls.append(url)
return current_articles current_articles.reverse()
return current_articles
def parse_ed_section(self, url): # parse from life.mingpao.com
self.get_fetchdate() def parse_section2(self, url, keystr):
soup = self.index_to_soup(url) self.get_fetchdate()
a = soup.findAll('a', href=True) soup = self.index_to_soup(url)
a.reverse() a = soup.findAll('a', href=True)
current_articles = [] a.reverse()
included_urls = [] current_articles = []
for i in a: included_urls = []
title = self.tag_to_string(i) for i in a:
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) title = self.tag_to_string(i)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1): url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
current_articles.append({'title': title, 'url': url, 'description': ''}) if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
included_urls.append(url) current_articles.append({'title': title, 'url': url, 'description': ''})
current_articles.reverse() included_urls.append(url)
return current_articles current_articles.reverse()
return current_articles
def parse_fin_section(self, url): def parse_ed_section(self, url):
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href= True) a = soup.findAll('a', href=True)
current_articles = [] a.reverse()
included_urls = [] current_articles = []
for i in a: included_urls = []
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False) for i in a:
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) title = self.tag_to_string(i)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1: url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i) current_articles.append({'title': title, 'url': url, 'description': ''})
current_articles.append({'title': title, 'url': url, 'description':''}) included_urls.append(url)
included_urls.append(url) current_articles.reverse()
return current_articles return current_articles
def parse_ent_section(self, url): def parse_fin_section(self, url):
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href=True) a = soup.findAll('a', href= True)
a.reverse() current_articles = []
current_articles = [] included_urls = []
included_urls = [] for i in a:
for i in a: #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
title = self.tag_to_string(i) url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False) #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1): if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''}) title = self.tag_to_string(i)
included_urls.append(url) current_articles.append({'title': title, 'url': url, 'description':''})
current_articles.reverse() included_urls.append(url)
return current_articles return current_articles
def parse_col_section(self, url): def parse_ent_section(self, url):
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href=True) a = soup.findAll('a', href=True)
a.reverse() a.reverse()
current_articles = [] current_articles = []
included_urls = [] included_urls = []
for i in a: for i in a:
title = self.tag_to_string(i) title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''}) current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url) included_urls.append(url)
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
def preprocess_html(self, soup): def parse_col_section(self, url):
for item in soup.findAll(style=True): self.get_fetchdate()
del item['style'] soup = self.index_to_soup(url)
for item in soup.findAll(style=True): a = soup.findAll('a', href=True)
del item['width'] a.reverse()
for item in soup.findAll(stype=True): current_articles = []
del item['absmiddle'] included_urls = []
return soup for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def create_opf(self, feeds, dir=None): def preprocess_html(self, soup):
if dir is None: for item in soup.findAll(style=True):
dir = self.output_dir del item['style']
if __UseChineseTitle__ == True: for item in soup.findAll(style=True):
title = u'\u660e\u5831 (\u9999\u6e2f)' del item['width']
else: for item in soup.findAll(stype=True):
title = self.short_title() del item['absmiddle']
# if not generating a periodical, force date to apply in title return soup
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] def create_opf(self, feeds, dir=None):
manifest.append(os.path.join(dir, 'index.html')) if dir is None:
manifest.append(os.path.join(dir, 'index.ncx')) dir = self.output_dir
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
# Get cover manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
cpath = getattr(self, 'cover_path', None) manifest.append(os.path.join(dir, 'index.html'))
if cpath is None: manifest.append(os.path.join(dir, 'index.ncx'))
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead # Get cover
mpath = getattr(self, 'masthead_path', None) cpath = getattr(self, 'cover_path', None)
if mpath is not None and os.access(mpath, os.R_OK): if cpath is None:
manifest.append(mpath) pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
opf.create_manifest_from_files_in(manifest) # Get masthead
for mani in opf.manifest: mpath = getattr(self, 'masthead_path', None)
if mani.path.endswith('.ncx'): if mpath is not None and os.access(mpath, os.R_OK):
mani.id = 'ncx' manifest.append(mpath)
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent): opf.create_manifest_from_files_in(manifest)
f = feeds[num] for mani in opf.manifest:
for j, a in enumerate(f): if mani.path.endswith('.ncx'):
if getattr(a, 'downloaded', False): mani.id = 'ncx'
adir = 'feed_%d/article_%d/'%(num, j) if mani.path.endswith('mastheadImage.jpg'):
auth = a.author mani.id = 'masthead-image'
if not auth: entries = ['index.html']
auth = None toc = TOC(base_path=dir)
desc = a.text_summary self.play_order_counter = 0
if not desc: self.play_order_map = {}
desc = None
else: def feed_index(num, parent):
desc = self.description_limiter(desc) f = feeds[num]
entries.append('%sindex.html'%adir) for j, a in enumerate(f):
po = self.play_order_map.get(entries[-1], None) if getattr(a, 'downloaded', False):
if po is None: adir = 'feed_%d/article_%d/'%(num, j)
self.play_order_counter += 1 auth = a.author
po = self.play_order_counter if not auth:
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc) play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp]) prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):] relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/')) entries.append(relp.replace(os.sep, '/'))
last = sp last = sp
if os.path.exists(last): if os.path.exists(last):
with open(last, 'rb') as fi: with open(last, 'rb') as fi:
src = fi.read().decode('utf-8') src = fi.read().decode('utf-8')
soup = BeautifulSoup(src) soup = BeautifulSoup(src)
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f), templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed, not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix, a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar) center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0: if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.') raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1: if len(feeds) > 1:
for i, f in enumerate(feeds): for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i) entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None) po = self.play_order_map.get(entries[-1], None)
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
auth = getattr(f, 'author', None) auth = getattr(f, 'author', None)
if not auth: if not auth:
auth = None auth = None
desc = getattr(f, 'description', None) desc = getattr(f, 'description', None)
if not desc: if not desc:
desc = None desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth)) f.title, play_order=po, description=desc, author=auth))
else: else:
entries.append('feed_%d/index.html'%0) entries.append('feed_%d/index.html'%0)
feed_index(0, toc) feed_index(0, toc)
for i, p in enumerate(entries): for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep)) entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries) opf.create_spine(entries)
opf.set_toc(toc) opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

50
recipes/moldovaazi.recipe Normal file
View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
azi.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MoldovaAzi(BasicNewsRecipe):
title = u'Moldova Azi'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Moldova pe internet'
publisher = 'Moldova Azi'
category = 'Ziare,Stiri,Moldova'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.azi.md/images/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'id':'in'})
]
remove_tags = [
dict(name='div', attrs={'class':'in-more-stories'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'comment_wrapper'})
, dict(name='div', attrs={'class':'box-title4'})
]
feeds = [ (u'\u0218tiri', u'http://www.azi.md/ro/feeds/0/rss201') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

71
recipes/natgeo.recipe Normal file
View File

@ -0,0 +1,71 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2011, gagsays <gagsays at gmail dot com>'
'''
nationalgeographic.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NatGeo(BasicNewsRecipe):
title = u'National Geographic'
description = 'Daily news articles from The National Geographic'
language = 'en'
oldest_article = 20
max_articles_per_feed = 25
encoding = 'utf8'
publisher = 'nationalgeographic.com'
category = 'science, nat geo'
__author__ = 'gagsays'
masthead_url = 'http://s.ngeo.com/wpf/sites/themes/global/i/presentation/ng_logo_small.png'
description = 'Inspiring people to care about the planet since 1888'
timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True
use_embedded_content = False
extra_css = '''
body {color: #000000;font-size: medium;}
h1 {color: #222222; font-size: large; font-weight:lighter; text-decoration:none; text-align: center;font-family:Georgia,Times New Roman,Times,serif;}
h2 {color: #454545; font-size: small; font-weight:lighter; text-decoration:none; text-align: justify; font-style:italic;font-family :Georgia,Times New Roman,Times,serif;}
h3 {color: #555555; font-size: small; font-style:italic; margin-top: 10px;}
img{margin-bottom: 0.25em;display:block;margin-left: auto;margin-right: auto;}
a:link,a,.a,href {text-decoration: none;color: #000000;}
.caption{color: #000000;font-size: xx-small;text-align: justify;font-weight:normal;}
.credit{color: #555555;font-size: xx-small;text-align: left;font-weight:lighter;}
p.author,p.publication{color: #000000;font-size: xx-small;text-align: left;display:inline;}
p.publication_time{color: #000000;font-size: xx-small;text-align: right;text-decoration: underline;}
p {margin-bottom: 0;}
p + p {text-indent: 1.5em;margin-top: 0;}
.hidden{display:none;}
#page_head{text-transform:uppercase;}
'''
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
if 'Presented' in article.title or 'Pictures' in article.title:
feed.articles.remove(article)
return feeds
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
remove_tags_before = dict(id='page_head')
keep_only_tags = [
dict(name='div',attrs={'id':['page_head','content_mainA']})
]
remove_tags_after = [
dict(name='div',attrs={'class':['article_text','promo_collection']})
]
remove_tags = [
dict(name='div', attrs={'class':['aside','primary full_width']})
,dict(name='div', attrs={'id':['header_search','navigation_mainB_wrap']})
]
feeds = [
(u'Daily News', u'http://feeds.nationalgeographic.com/ng/News/News_Main')
]

View File

@ -0,0 +1,25 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
title = u'National Geographic (DE)'
__author__ = 'Anonymous'
language = 'de'
oldest_article = 7
max_articles_per_feed = 1000
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
cover_url = 'http://www.nationalgeographic.de/images/national-geographic-logo.jpg'
keep_only_tags = [
dict(name='div', attrs={'class':['contentbox_no_top_border']}) ]
remove_tags = [
dict(name='div', attrs={'class':'related'}),
dict(name='li', attrs={'class':'first'}),
dict(name='div', attrs={'class':'extrasbox_inner'}),
]
feeds = [ (u'National Geographic', u'http://feeds.nationalgeographic.de/ng-neueste-artikel'),
]

View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
newsmoldova.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NewsMoldova(BasicNewsRecipe):
title = u'Agen\u0163ia de \u015ftiri Moldova'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Agen\u0163ia de \u015ftiri Moldova'
publisher = 'Moldova'
category = 'Ziare,Stiri,Moldova'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.newsmoldova.md/i/logo_top_md.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'main-article-index article'})
]
remove_tags = [
dict(name='div', attrs={'id':'actions'})
, dict(name='li', attrs={'class':'invisible'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'actions'})
]
feeds = [ (u'\u0218tiri', u'http://newsmoldova.md/export/rss2/archive/index.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -11,6 +11,20 @@ class Newsweek(BasicNewsRecipe):
BASE_URL = 'http://www.newsweek.com' BASE_URL = 'http://www.newsweek.com'
topics = {
'Culture' : '/tag/culture.html',
'Business' : '/tag/business.html',
'Society' : '/tag/society.html',
'Science' : '/tag/science.html',
'Education' : '/tag/education.html',
'Politics' : '/tag/politics.html',
'Health' : '/tag/health.html',
'World' : '/tag/world.html',
'Nation' : '/tag/nation.html',
'Technology' : '/tag/technology.html',
'Game Changers' : '/tag/game-changers.html',
}
keep_only_tags = dict(name='article', attrs={'class':'article-text'}) keep_only_tags = dict(name='article', attrs={'class':'article-text'})
remove_tags = [dict(attrs={'data-dartad':True})] remove_tags = [dict(attrs={'data-dartad':True})]
remove_attributes = ['property'] remove_attributes = ['property']
@ -21,14 +35,10 @@ class Newsweek(BasicNewsRecipe):
return soup return soup
def newsweek_sections(self): def newsweek_sections(self):
return [ for topic_name, topic_url in self.topics.iteritems():
('Nation', 'http://www.newsweek.com/tag/nation.html'), yield (topic_name,
('Society', 'http://www.newsweek.com/tag/society.html'), self.BASE_URL+topic_url)
('Culture', 'http://www.newsweek.com/tag/culture.html'),
('World', 'http://www.newsweek.com/tag/world.html'),
('Politics', 'http://www.newsweek.com/tag/politics.html'),
('Business', 'http://www.newsweek.com/tag/business.html'),
]
def newsweek_parse_section_page(self, soup): def newsweek_parse_section_page(self, soup):
for article in soup.findAll('article', about=True, for article in soup.findAll('article', about=True,

29
recipes/ngz.recipe Normal file
View File

@ -0,0 +1,29 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'NGZ-online'
__author__ = 'schuster'
remove_tags_before = dict(id='bu')
remove_tags_after = dict(id='noblock')
remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix', 'liketext']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index', 'Verlinken', 'vorheriger', 'LESERKOMMENTARE', 'bei facebook', 'bei twitter', 'Schreiben Sie jetzt Ihre Meinung:', 'Thema', 'Ihr Beitrag', 'Ihr Name', 'Ich möchte über weitere Lesermeinungen zu diesem Artikel per E-Mail informiert werden.', 'banneroben', 'bannerrechts', 'inserieren', 'stellen', 'auto', 'immobilien', 'kleinanzeige', 'tiere', 'ferienwohnung', 'NGZ Card', 'Mediengruppe RP', 'Werben', 'Newsletter', 'Wetter', 'RSS', 'Abo', 'Anzeigen', 'Redaktion', 'Schulprojekte', 'Gast', 'Mein NGZ', 'Nachrichten', 'Sport', 'Wirtschaft', 'Stadt-Infos', 'Bilderserien', 'Bookmarken', 'del.icio.us', 'Mister Wong', 'YiGG', 'Webnews', 'Shortnews', 'Twitter', 'Newsider', 'Facebook', 'StudiVZ/MeinVZ', 'Versenden', 'Drucken']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.rhein-kreis-neuss-macht-sport.de/sport/includes/bilder/ngz_logo.jpg'
def print_version(self, url):
return url + '?ot=de.circit.rpo.PopupPageLayout.ot'
feeds = [
(u'Grevenbroich', u'http://www.ngz-online.de/app/feed/rss/grevenbroich'),
(u'Kreis Neuss', u'http://www.ngz-online.de/app/feed/rss/rheinkreisneuss'),
(u'Dormagen', u'http://www.ngz-online.de/app/feed/rss/dormagen'),
(u'J\xfcchen', u'http://www.ngz-online.de/app/feed/rss/juechen'),
(u'Rommerskirchen', u'http://www.ngz-online.de/app/feed/rss/rommerskirchen')
]

22
recipes/pro_physik.recipe Normal file
View File

@ -0,0 +1,22 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Pro Physik'
__author__ = 'schuster'
oldest_article = 4
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
cover_url = 'http://www.pro-physik.de/Phy/images/site/prophysik_logo1.jpg'
def print_version(self, url):
return url.replace('leadArticle.do', 'print.do')
feeds = [(u'Hightech', u'http://www.pro-physik.de/Phy/hightechfeed.xml'),
(u'Forschung', u'http://www.pro-physik.de/Phy/forschungfeed.xml'),
(u'Magazin', u'http://www.pro-physik.de/Phy/magazinfeed.xml')]

View File

@ -3,7 +3,6 @@ __license__ = 'GPL v3'
''' '''
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
class ReadersDigest(BasicNewsRecipe): class ReadersDigest(BasicNewsRecipe):
@ -38,151 +37,20 @@ class ReadersDigest(BasicNewsRecipe):
''' '''
remove_tags = [
dict(name='h4', attrs={'class':'close'}),
dict(name='div', attrs={'class':'fromLine'}),
dict(name='img', attrs={'class':'colorTag'}),
dict(name='div', attrs={'id':'sponsorArticleHeader'}),
dict(name='div', attrs={'class':'horizontalAd'}),
dict(name='div', attrs={'id':'imageCounterLeft'}),
dict(name='div', attrs={'id':'commentsPrint'})
]
feeds = [ feeds = [
('New in RD', 'http://feeds.rd.com/ReadersDigest'), ('Food', 'http://www.rd.com/food/feed'),
('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'), ('Health', 'http://www.rd.com/health/feed'),
('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'), ('Home', 'http://www.rd.com/home/feed'),
('Blogs','http://feeds.rd.com/ReadersDigestBlogs') ('Family', 'http://www.rd.com/family/feed'),
('Money', 'http://www.rd.com/money/feed'),
('Travel', 'http://www.rd.com/travel/feed'),
] ]
cover_url = 'http://www.rd.com/images/logo-main-rd.gif' cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
keep_only_tags = dict(id='main-content')
remove_tags = [
#------------------------------------------------------------------------------------------------- {'class':['post-categories']},
def print_version(self, url):
# Get the identity number of the current article and append it to the root print URL
if url.find('/article') > 0:
ident = url[url.find('/article')+8:url.find('.html?')-4]
url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
elif url.find('/post') > 0:
# in this case, have to get the page itself to derive the Print page.
soup = self.index_to_soup(url)
newsoup = soup.find('ul',attrs={'class':'printBlock'})
url = 'http://www.rd.com' + newsoup('a')[0]['href']
url = url[0:url.find('&Keep')]
return url
#-------------------------------------------------------------------------------------------------
def parse_index(self):
pages = [
('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
# useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
] ]
feeds = []
for page in pages:
section, url, divider, attrList = page
newArticles = self.page_parse(url, divider, attrList)
feeds.append((section,newArticles))
# after the pages of the site have been processed, parse several RSS feeds for additional sections
newfeeds = Feed()
newfeeds = self.parse_rss()
# The utility code in parse_rss returns a Feed object. Convert each feed/article combination into a form suitable
# for this module (parse_index).
for feed in newfeeds:
newArticles = []
for article in feed.articles:
newArt = {
'title' : article.title,
'url' : article.url,
'date' : article.date,
'description' : article.text_summary
}
newArticles.append(newArt)
# New and Blogs should be the first two feeds.
if feed.title == 'New in RD':
feeds.insert(0,(feed.title,newArticles))
elif feed.title == 'Blogs':
feeds.insert(1,(feed.title,newArticles))
else:
feeds.append((feed.title,newArticles))
return feeds
#-------------------------------------------------------------------------------------------------
def page_parse(self, mainurl, divider, attrList):
articles = []
mainsoup = self.index_to_soup(mainurl)
for item in mainsoup.findAll(attrs=attrList):
newArticle = {
'title' : item('img')[0]['alt'],
'url' : 'http://www.rd.com'+item('a')[0]['href'],
'date' : '',
'description' : ''
}
articles.append(newArticle)
return articles
#-------------------------------------------------------------------------------------------------
def parse_rss (self):
# Do the "official" parse_feeds first
feeds = BasicNewsRecipe.parse_feeds(self)
# Loop thru the articles in all feeds to find articles with "recipe" in it
recipeArticles = []
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if curarticle.title.upper().find('RECIPE') >= 0:
recipeArticles.append(curarticle)
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
# If there are any recipes found, create a new Feed object and append.
if len(recipeArticles) > 0:
pfeed = Feed()
pfeed.title = 'Recipes'
pfeed.descrition = 'Recipe Feed (Virtual)'
pfeed.image_url = None
pfeed.oldest_article = 30
pfeed.id_counter = len(recipeArticles)
# Create a new Feed, add the recipe articles, and then append
# to "official" list of feeds
pfeed.articles = recipeArticles[:]
feeds.append(pfeed)
return feeds

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, '
'''
replicavedetelor.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ReplicaVedetelor(BasicNewsRecipe):
title = u'Replica Vedetelor'
__author__ = u'Silviu Cotoara'
description = u'Ofer\u0103 vedetelor dreptul la replic\u0103'
publisher = 'Replica Vedetelor'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Vedete'
encoding = 'utf-8'
cover_url = 'http://www.webart-software.eu/_pics/lucrari_referinta/medium/84/1-Replica-Vedetelor.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'zona-continut'})
]
remove_tags = [
dict(name='ul', attrs={'id':['lista-imagini']})
, dict(name='form', attrs={'id':['f-trimite-unui-prieten']})
]
remove_tags_after = [
dict(name='form', attrs={'id':['f-trimite-unui-prieten']})
]
feeds = [
(u'Feeds', u'http://www.replicavedetelor.ro/feed')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class RzeczpospolitaRecipe(BasicNewsRecipe): class RzeczpospolitaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = u'kwetal and Tomasz Dlugosz'
language = 'pl' language = 'pl'
version = 1 version = 1
@ -38,6 +38,8 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'})) remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'})) remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))
extra_css = ''' extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -48,6 +50,13 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
.fot{font-size: x-small; color: #666666;} .fot{font-size: x-small; color: #666666;}
''' '''
def skip_ad_pages(self, soup):
if ('advertisement' in soup.find('title').string.lower()):
href = soup.find('a').get('href')
return self.index_to_soup(href, raw=True)
else:
return None
def print_version(self, url): def print_version(self, url):
start, sep, rest = url.rpartition('/') start, sep, rest = url.rpartition('/')
forget, sep, index = rest.rpartition(',') forget, sep, index = rest.rpartition(',')

28
recipes/spektrum.recipe Normal file
View File

@ -0,0 +1,28 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Spektrum (der Wissenschaft)'
__author__ = 'schuster'
oldest_article = 7
max_articles_per_feed = 100
language = 'de'
cover_url = 'http://upload.wikimedia.org/wikipedia/de/3/3b/Spektrum_der_Wissenschaft_Logo.svg'
remove_tags = [dict(attrs={'class':['hauptnaviPkt gainlayout', 'hauptnaviButton', 'suchButton', 'suchbegriffKasten', 'loginButton', 'subnavigation', 'artikelInfoLeiste gainlayout', 'artikelTools', 'nurLetzteSeite', 'link', 'boxUnterArtikel', 'leserbriefeBlock', 'boxTitel', 'boxInhalt', 'sehrklein', 'boxabstand', 'werbeboxinhalt', 'rbabstand', 'bildlinks', 'rechtebox', 'denkmalbox', 'denkmalfrage']}),
dict(id=['pflip', 'verlagsleiste', 'bereich', 'bannerVertikal', 'headerLogoLink', 'kopf', 'topNavi', 'headerSchnellsuche', 'headerSchnellsucheWarten', 'navigation', 'navigationL', 'navigationR', 'inhalt', 'rechtespalte', 'sdwboxenshop', 'shopboxen', 'fuss']),
dict(name=['naservice'])]
def print_version(self,url):
newurl = url.replace('artikel/', 'sixcms/detail.php?id=')
return newurl + '&_druckversion=1'
feeds = [(u'Spektrum der Wissenschaft', u'http://www.spektrum.de/artikel/982623'),
(u'SpektrumDirekt', u'http://www.spektrumdirekt.de/artikel/996406'),
(u'Sterne und Weltraum', u'http://www.astronomie-heute.de/artikel/865248'),
(u'Gehirn & Geist', u'http://www.gehirn-und-geist.de/artikel/982626'),
(u'epoc', u'http://www.epoc.de/artikel/982625')
]
filter_regexps = [r'ads\.doubleclick\.net']

View File

@ -33,7 +33,7 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
elif c.name.endswith('_password'): elif c.name.endswith('_password'):
br[c.name] = self.password br[c.name] = self.password
raw = br.submit().read() raw = br.submit().read()
if '>Logout' not in raw: if 'You have been logged in' not in raw:
raise ValueError('Failed to login, check your username and password') raise ValueError('Failed to login, check your username and password')
return br return br

View File

@ -0,0 +1,24 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Technology Review'
__author__ = 'schuster'
remove_tags_before = dict(id='keywords')
remove_tags_after = dict(id='kommentar')
remove_tags = [dict(attrs={'class':['navi_oben_pvg', 'navi_oben_tarifr', 'navi_oben_itm', 'navi_oben_eve', 'navi_oben_whi', 'navi_oben_abo', 'navi_oben_shop', 'navi_top_logo', 'navi_top_abschnitt', 'first']}),
dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
dict(name=['script', 'noscript', 'style'])]
oldest_article = 4
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
def print_version(self, url):
return url + '?view=print'
feeds = [
(u'Technik News', u'http://www.heise.de/tr/news-atom.xml') ]

View File

@ -1,17 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TelepolisNews(BasicNewsRecipe): class TelepolisNews(BasicNewsRecipe):
title = u'Telepolis (News+Artikel)' title = u'Telepolis (News+Artikel)'
__author__ = 'Gerhard Aigner' __author__ = 'syntaxis'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG' publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
description = 'News from telepolis' description = 'News from Telepolis'
category = 'news' category = 'news'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
@ -20,14 +15,19 @@ class TelepolisNews(BasicNewsRecipe):
encoding = "utf-8" encoding = "utf-8"
language = 'de' language = 'de'
use_embedded_content =False
remove_empty_feeds = True remove_empty_feeds = True
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})] keep_only_tags = [dict(name = 'div',attrs={'class':'head'}),dict(name = 'div',attrs={'class':'leftbox'}),dict(name='td',attrs={'class':'strict'})]
remove_tags = [ dict(name='td',attrs={'class':'blogbottom'}),
dict(name='div',attrs={'class':'forum'}), dict(name='div',attrs={'class':'social'}),dict(name='div',attrs={'class':'blog-letter p-news'}),
dict(name='div',attrs={'class':'blog-sub'}),dict(name='div',attrs={'class':'version-div'}),dict(name='div',attrs={'id':'breadcrumb'})
,dict(attrs={'class':'tp-url'}),dict(attrs={'class':'blog-name entry_'}) ]
remove_tags_after = [dict(name='span', attrs={'class':['breadcrumb']})]
feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')] feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
@ -39,15 +39,8 @@ class TelepolisNews(BasicNewsRecipe):
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
def get_article_url(self, article):
'''if the linked article is of kind artikel don't take it'''
if (article.link.count('artikel') > 1) :
return None
return article.link
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">' mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
return soup return soup

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008 - 2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008 - 2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
thenation.com thenation.com
''' '''
@ -16,10 +16,17 @@ class Thenation(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
language = 'en' language = 'en'
use_embedded_content = False use_embedded_content = False
delay = 1 delay = 1
masthead_url = 'http://www.thenation.com/sites/default/themes/thenation/images/logo-main.gif' masthead_url = 'http://www.thenation.com/sites/default/themes/thenation/images/logo-main.gif'
exra_css = ' body{font-family: Arial,Helvetica,sans-serif;} .print-created{font-size: small;} .caption{display: block; font-size: x-small;} ' login_url = 'http://www.thenation.com/user?destination=%3Cfront%3E'
publication_type = 'magazine'
needs_subscription = 'optional'
exra_css = """
body{font-family: Arial,Helvetica,sans-serif;}
.print-created{font-size: small;}
.caption{display: block; font-size: x-small;}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
@ -28,13 +35,30 @@ class Thenation(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [ dict(attrs={'class':['print-title','print-created','print-content','print-links']}) ] keep_only_tags = [dict(attrs={'class':['print-title','print-created','print-content','print-links']})]
remove_tags = [dict(name='link')] remove_tags = [dict(name=['link','iframe','base','meta','object','embed'])]
remove_attributes = ['lang']
feeds = [(u"Editor's Picks", u'http://www.thenation.com/rss/editors_picks')] feeds = [(u"Articles", u'http://www.thenation.com/rss/articles')]
def print_version(self, url): def print_version(self, url):
return url.replace('.thenation.com/','.thenation.com/print/') return url.replace('.thenation.com/','.thenation.com/print/')
def preprocess_html(self, soup): def get_browser(self):
return self.adeify_images(soup) br = BasicNewsRecipe.get_browser()
br.open('http://www.thenation.com/')
if self.username is not None and self.password is not None:
br.open(self.login_url)
br.select_form(nr=1)
br['name'] = self.username
br['pass'] = self.password
br.submit()
return br
def get_cover_url(self):
soup = self.index_to_soup('http://www.thenation.com/issue/')
item = soup.find('div',attrs={'id':'cover-wrapper'})
if item:
return item.img['src']
return None

View File

@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class UnitedDaily(BasicNewsRecipe):
title = u'聯合新聞網'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
(u'政治', u'http://udn.com/udnrss/politics.xml'),
(u'社會', u'http://udn.com/udnrss/social.xml'),
(u'生活', u'http://udn.com/udnrss/life.xml'),
(u'綜合', u'http://udn.com/udnrss/education.xml'),
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
(u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'),
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
(u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
(u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'),
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
(u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'),
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
(u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'),
(u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'),
(u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'),
(u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'),
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
(u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
(u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
(u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
(u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
(u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'),
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
(u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'),
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
(u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'),
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
(u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'),
(u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'),
]
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}'''
__author__ = 'Eddie Lau'
__version__ = '1.1'
language = 'zh-TW'
publisher = 'United Daily News Group'
description = 'United Daily (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
keep_only_tags = [dict(name='td', attrs={'class':['story_title']}),
dict(name='div', attrs={'id':['story_title']}),
dict(name='td', attrs={'class':['story_author']}),
dict(name='div', attrs={'id':['story_author']}),
dict(name='td', attrs={'class':['story']}),
dict(name='div', attrs={'id':['story']}),
]
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]

View File

@ -1,64 +1,75 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.washingtonpost.com
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class TheWashingtonPost(BasicNewsRecipe):
title = 'The Washington Post'
__author__ = 'Darko Miletic'
description = 'Leading source for news, video and opinion on politics, business, world and national news, science, travel, entertainment and more. Our local coverage includes reporting on education, crime, weather, traffic, real estate, jobs and cars for DC, Maryland and Virginia. Offering award-winning opinion writing, entertainment information and restaurant reviews.'
publisher = 'The Washington Post Company'
category = 'news, politics, USA'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
delay = 1
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.washingtonpost.com/rw/sites/twpweb/img/logos/twp_logo_300.gif'
cover_url = strftime('http://www.washingtonpost.com/rw/WashingtonPost/Content/Epaper/%Y-%m-%d/Ax1.pdf')
extra_css = """
body{font-family: Georgia,serif }
"""
class WashingtonPost(BasicNewsRecipe): conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
title = 'Washington Post' keep_only_tags = [dict(attrs={'id':['content','entryhead','entrytext']})]
description = 'US political news' remove_tags = [
__author__ = 'Kovid Goyal' dict(name=['meta','link','iframe','base'])
use_embedded_content = False ,dict(attrs={'id':'multimedia-leaf-page'})
max_articles_per_feed = 20 ]
language = 'en' remove_attributes= ['lang','property','epochtime','datetitle','pagetype','contenttype','comparetime']
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
feeds = [ feeds = [
('Politics', 'http://www.washingtonpost.com/rss/politics'), (u'World' , u'http://feeds.washingtonpost.com/rss/world' )
('Nation', 'http://www.washingtonpost.com/rss/national'), ,(u'National' , u'http://feeds.washingtonpost.com/rss/national' )
('World', 'http://www.washingtonpost.com/rss/world'), ,(u'White House' , u'http://feeds.washingtonpost.com/rss/politics/whitehouse' )
('Business', 'http://www.washingtonpost.com/rss/business'), ,(u'Business' , u'http://feeds.washingtonpost.com/rss/business' )
('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'), ,(u'Opinions' , u'http://feeds.washingtonpost.com/rss/opinions' )
('Sports', 'http://www.washingtonpost.com/rss/sports'), ,(u'Investigations' , u'http://feeds.washingtonpost.com/rss/investigations' )
('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'), ,(u'Local' , u'http://feeds.washingtonpost.com/rss/local' )
('Opinions', 'http://www.washingtonpost.com/rss/opinions'), ,(u'Entertainment' , u'http://feeds.washingtonpost.com/rss/entertainment' )
('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'), ,(u'Sports' , u'http://feeds.washingtonpost.com/rss/sports' )
('Local', 'http://www.washingtonpost.com/rss/local'), ,(u'Redskins' , u'http://feeds.washingtonpost.com/rss/sports/redskins' )
('Investigations', ,(u'Special Reports', u'http://feeds.washingtonpost.com/rss/national/special-reports')
'http://www.washingtonpost.com/rss/investigations'), ]
]
remove_tags = [
{'class':lambda x: x and 'article-toolbar' in x},
{'class':lambda x: x and 'quick-comments' in x},
{'class':lambda x: x and 'tweet' in x},
{'class':lambda x: x and 'article-related' in x},
{'class':lambda x: x and 'hidden' in x.split()},
{'class':lambda x: x and 'also-read' in x.split()},
{'class':lambda x: x and 'partners-content' in x.split()},
{'class':['module share', 'module ads', 'comment-vars', 'hidden',
'share-icons-wrap', 'comments', 'flipper']},
{'id':['right-rail', 'save-and-share']},
{'width':'1', 'height':'1'},
]
keep_only_tags = dict(id=['content', 'article'])
def get_article_url(self, *args):
ans = BasicNewsRecipe.get_article_url(self, *args)
ans = ans.rpartition('?')[0]
if ans.endswith('_video.html'):
return None
if 'ads.pheedo.com' in ans:
return None
#if not ans.endswith('_blog.html'):
# return None
return ans
def print_version(self, url): def print_version(self, url):
return url.replace('_story.html', '_singlePage.html') if '_story.html' in url:
return url.replace('_story.html','_print.html')
return url
def get_article_url(self, article):
link = BasicNewsRecipe.get_article_url(self,article)
if not 'washingtonpost.com' in link:
self.log('Skipping adds:', link)
return None
for it in ['_video.html','_gallery.html','_links.html']:
if it in link:
self.log('Skipping non-article:', link)
return None
return link

View File

@ -0,0 +1,20 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Welt der Physik'
__author__ = 'schuster'
remove_tags_befor = [dict(name='div', attrs={'class':'inhalt_bild_text_printonly'})]
remove_tags_after = [dict(name='span', attrs={'class':'clearinhalt_bild'})]
remove_tags = [dict(attrs={'class':['invisible', 'searchfld', 'searchbtn', 'topnavi', 'topsearch']}),
dict(id=['naservice', 'phservicemenu', '',]),
dict(name=['naservice'])]
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
feeds = [(u'Nachrichten und Neuigkeiten', u'http://www.weltderphysik.de/rss/alles.xml')]

53
recipes/ziuaveche.recipe Normal file
View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
ziuaveche.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ZiuaVeche(BasicNewsRecipe):
title = u'Ziua Veche'
__author__ = u'Silviu Cotoar\u0103'
description = 'Cotidian online'
publisher = 'Ziua Veche'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Cotidiane,Stiri'
encoding = 'utf-8'
cover_url = 'http://www.ziuaveche.ro/wp-content/themes/tema/images/zv-logo-alb-old.png'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'singlePost'})
]
remove_tags = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
]
feeds = [
(u'Feeds', u'http://www.ziuaveche.ro/feed/rss')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -41,14 +41,20 @@ authors_completer_append_separator = False
#: Author sort name algorithm #: Author sort name algorithm
# The algorithm used to copy author to author_sort # The algorithm used to copy author to author_sort
# Possible values are: # Possible values are:
# invert: use "fn ln" -> "ln, fn" (the default algorithm) # invert: use "fn ln" -> "ln, fn"
# copy : copy author to author_sort without modification # copy : copy author to author_sort without modification
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert' # comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
# nocomma : "fn ln" -> "ln fn" (without the comma) # nocomma : "fn ln" -> "ln fn" (without the comma)
# When this tweak is changed, the author_sort values stored with each author # When this tweak is changed, the author_sort values stored with each author
# must be recomputed by right-clicking on an author in the left-hand tags pane, # must be recomputed by right-clicking on an author in the left-hand tags pane,
# selecting 'manage authors', and pressing 'Recalculate all author sort values'. # selecting 'manage authors', and pressing 'Recalculate all author sort values'.
# The author name suffixes are words that are ignored when they occur at the
# end of an author name. The case of the suffix is ignored and trailing
# periods are automatically handled.
author_sort_copy_method = 'comma' author_sort_copy_method = 'comma'
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
'MD', 'M.D', 'I', 'II', 'III', 'IV',
'Junior', 'Senior')
#: Use author sort in Tag Browser #: Use author sort in Tag Browser
# Set which author field to display in the tags pane (the list of authors, # Set which author field to display in the tags pane (the list of authors,
@ -345,3 +351,11 @@ send_news_to_device_location = "main"
# work on all operating systems) # work on all operating systems)
server_listen_on = '0.0.0.0' server_listen_on = '0.0.0.0'
#: Unified toolbar on OS X
# If you enable this option and restart calibre, the toolbar will be 'unified'
# with the titlebar as is normal for OS X applications. However, doing this has
# various bugs, for instance the minimum width of the toolbar becomes twice
# what it should be and it causes other random bugs on some systems, so turn it
# on at your own risk!
unified_title_toolbar_on_osx = False

View File

@ -3,10 +3,12 @@
"divide": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x / y)\n", "divide": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x / y)\n",
"uppercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.upper()\n", "uppercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.upper()\n",
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n", "strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
"in_list": "def evaluate(self, formatter, kwargs, mi, locals, val, sep, pat, fv, nfv):\n l = [v.strip() for v in val.split(sep) if v.strip()]\n for v in l:\n if re.search(pat, v):\n return fv\n return nfv\n",
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n", "substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n", "ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
"booksize": "def evaluate(self, formatter, kwargs, mi, locals):\n if mi.book_size is not None:\n try:\n return str(mi.book_size)\n except:\n pass\n return ''\n", "booksize": "def evaluate(self, formatter, kwargs, mi, locals):\n if mi.book_size is not None:\n try:\n return str(mi.book_size)\n except:\n pass\n return ''\n",
"select": "def evaluate(self, formatter, kwargs, mi, locals, val, key):\n if not val:\n return ''\n vals = [v.strip() for v in val.split(',')]\n for v in vals:\n if v.startswith(key+':'):\n return v[len(key)+1:]\n return ''\n", "select": "def evaluate(self, formatter, kwargs, mi, locals, val, key):\n if not val:\n return ''\n vals = [v.strip() for v in val.split(',')]\n for v in vals:\n if v.startswith(key+':'):\n return v[len(key)+1:]\n return ''\n",
"first_non_empty": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n while i < len(args):\n if args[i]:\n return args[i]\n i += 1\n return ''\n",
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n", "field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n", "subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
"list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n", "list_item": "def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):\n if not val:\n return ''\n index = int(index)\n val = val.split(sep)\n try:\n return val[index]\n except:\n return ''\n",

View File

@ -12,7 +12,9 @@ is64bit = platform.architecture()[0] == '64bit'
iswindows = re.search('win(32|64)', sys.platform) iswindows = re.search('win(32|64)', sys.platform)
isosx = 'darwin' in sys.platform isosx = 'darwin' in sys.platform
isfreebsd = 'freebsd' in sys.platform isfreebsd = 'freebsd' in sys.platform
islinux = not isosx and not iswindows and not isfreebsd isnetbsd = 'netbsd' in sys.platform
isbsd = isnetbsd or isfreebsd
islinux = not isosx and not iswindows and not isbsd
SRC = os.path.abspath('src') SRC = os.path.abspath('src')
sys.path.insert(0, SRC) sys.path.insert(0, SRC)
sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources') sys.resources_location = os.path.join(os.path.dirname(SRC), 'resources')

View File

@ -11,7 +11,7 @@ __all__ = [
'build', 'build_pdf2xml', 'server', 'build', 'build_pdf2xml', 'server',
'gui', 'gui',
'develop', 'install', 'develop', 'install',
'resources', 'kakasi', 'resources',
'check', 'check',
'sdist', 'sdist',
'manual', 'tag_release', 'manual', 'tag_release',
@ -49,8 +49,9 @@ gui = GUI()
from setup.check import Check from setup.check import Check
check = Check() check = Check()
from setup.resources import Resources from setup.resources import Resources, Kakasi
resources = Resources() resources = Resources()
kakasi = Kakasi()
from setup.publish import Manual, TagRelease, Stage1, Stage2, \ from setup.publish import Manual, TagRelease, Stage1, Stage2, \
Stage3, Stage4, Publish Stage3, Stage4, Publish

View File

@ -11,7 +11,7 @@ from distutils import sysconfig
from PyQt4.pyqtconfig import QtGuiModuleMakefile from PyQt4.pyqtconfig import QtGuiModuleMakefile
from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows from setup import Command, islinux, isfreebsd, isbsd, isosx, SRC, iswindows
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \ from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \ fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \ podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
@ -21,7 +21,7 @@ from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \ jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \
icu_lib_dirs icu_lib_dirs
MT MT
isunix = islinux or isosx or isfreebsd isunix = islinux or isosx or isbsd
make = 'make' if isunix else NMAKE make = 'make' if isunix else NMAKE
@ -205,7 +205,7 @@ if islinux:
ldflags.append('-lpython'+sysconfig.get_python_version()) ldflags.append('-lpython'+sysconfig.get_python_version())
if isfreebsd: if isbsd:
cflags.append('-pthread') cflags.append('-pthread')
ldflags.append('-shared') ldflags.append('-shared')
cflags.append('-I'+sysconfig.get_python_inc()) cflags.append('-I'+sysconfig.get_python_inc())

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex import sys, os, textwrap, subprocess, shutil, tempfile, atexit, stat, shlex
from setup import Command, islinux, isfreebsd, basenames, modules, functions, \ from setup import Command, islinux, isfreebsd, isbsd, basenames, modules, functions, \
__appname__, __version__ __appname__, __version__
HEADER = '''\ HEADER = '''\
@ -116,7 +116,7 @@ class Develop(Command):
def pre_sub_commands(self, opts): def pre_sub_commands(self, opts):
if not (islinux or isfreebsd): if not (islinux or isbsd):
self.info('\nSetting up a source based development environment is only ' self.info('\nSetting up a source based development environment is only '
'supported on linux. On other platforms, see the User Manual' 'supported on linux. On other platforms, see the User Manual'
' for help with setting up a development environment.') ' for help with setting up a development environment.')
@ -156,7 +156,7 @@ class Develop(Command):
self.warn('Failed to compile mount helper. Auto mounting of', self.warn('Failed to compile mount helper. Auto mounting of',
' devices will not work') ' devices will not work')
if not isfreebsd and os.geteuid() != 0: if not isbsd and os.geteuid() != 0:
return self.warn('Must be run as root to compile mount helper. Auto ' return self.warn('Must be run as root to compile mount helper. Auto '
'mounting of devices will not work.') 'mounting of devices will not work.')
src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c') src = os.path.join(self.SRC, 'calibre', 'devices', 'linux_mount_helper.c')
@ -168,7 +168,7 @@ class Develop(Command):
ret = p.wait() ret = p.wait()
if ret != 0: if ret != 0:
return warn() return warn()
if not isfreebsd: if not isbsd:
os.chown(dest, 0, 0) os.chown(dest, 0, 0)
os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\ os.chmod(dest, stat.S_ISUID|stat.S_ISGID|stat.S_IRUSR|stat.S_IWUSR|\
stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH) stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH)

View File

@ -30,11 +30,12 @@ int report_libc_error(const char *msg) {
} }
int pyobject_to_int(PyObject *res) { int pyobject_to_int(PyObject *res) {
int ret; PyObject *tmp; int ret = 0; PyObject *tmp;
tmp = PyNumber_Int(res); if (res != NULL) {
if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0; tmp = PyNumber_Int(res);
else ret = (int)PyInt_AS_LONG(tmp); if (tmp == NULL) ret = (PyObject_IsTrue(res)) ? 1 : 0;
else ret = (int)PyInt_AS_LONG(tmp);
}
return ret; return ret;
} }

View File

@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn from setup.installer.windows.wix import WixMixIn
OPENSSL_DIR = r'Q:\openssl' OPENSSL_DIR = r'Q:\openssl'
QT_DIR = 'Q:\\Qt\\4.7.2' QT_DIR = 'Q:\\Qt\\4.7.3'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns'] QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUSB_DIR = 'C:\\libusb' LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'

View File

@ -11,9 +11,6 @@
SummaryCodepage='1252' /> SummaryCodepage='1252' />
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" /> <Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
<!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
<Property Id='REINSTALLMODE' Value='emus'/>
<Upgrade Id="{upgrade_code}"> <Upgrade Id="{upgrade_code}">
<UpgradeVersion Maximum="{version}" <UpgradeVersion Maximum="{version}"
@ -29,6 +26,11 @@
</Upgrade> </Upgrade>
<CustomAction Id="PreventDowngrading" Error="Newer version already installed."/> <CustomAction Id="PreventDowngrading" Error="Newer version already installed."/>
<Property Id="APPLICATIONFOLDER">
<RegistrySearch Id='calibreInstDir' Type='raw'
Root='HKLM' Key="Software\{app}\Installer" Name="InstallPath" />
</Property>
<Directory Id='TARGETDIR' Name='SourceDir'> <Directory Id='TARGETDIR' Name='SourceDir'>
<Merge Id="VCRedist" SourceFile="{crt_msm}" DiskId="1" Language="0"/> <Merge Id="VCRedist" SourceFile="{crt_msm}" DiskId="1" Language="0"/>
<Directory Id='ProgramFilesFolder' Name='PFiles'> <Directory Id='ProgramFilesFolder' Name='PFiles'>
@ -46,6 +48,9 @@
<Environment Id='UpdatePath' Name='PATH' Action='set' System='yes' Part='last' Value='[APPLICATIONFOLDER]' /> <Environment Id='UpdatePath' Name='PATH' Action='set' System='yes' Part='last' Value='[APPLICATIONFOLDER]' />
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/> <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/>
</Component> </Component>
<Component Id="RememberInstallDir" Guid="*">
<RegistryValue Root="HKLM" Key="Software\{app}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/>
</Component>
</DirectoryRef> </DirectoryRef>
<DirectoryRef Id="ApplicationProgramsFolder"> <DirectoryRef Id="ApplicationProgramsFolder">
@ -90,7 +95,8 @@
ConfigurableDirectory="APPLICATIONFOLDER"> ConfigurableDirectory="APPLICATIONFOLDER">
<Feature Id="MainApplication" Title="Program Files" Level="1" <Feature Id="MainApplication" Title="Program Files" Level="1"
Description="All the files need to run {app}" Absent="disallow"> Description="All the files needed to run {app}" Absent="disallow">
<ComponentRef Id="RememberInstallDir"/>
</Feature> </Feature>
<Feature Id="VCRedist" Title="Visual C++ 8.0 Runtime" AllowAdvertise="no" Display="hidden" Level="1"> <Feature Id="VCRedist" Title="Visual C++ 8.0 Runtime" AllowAdvertise="no" Display="hidden" Level="1">
@ -118,7 +124,7 @@
<Property Id="ARPPRODUCTICON" Value="main_icon" /> <Property Id="ARPPRODUCTICON" Value="main_icon" />
<Condition <Condition
Message="This application is only supported on Windows XP SP2, or higher."> Message="This application is only supported on Windows XP SP3, or higher.">
<![CDATA[Installed OR (VersionNT >= 501)]]> <![CDATA[Installed OR (VersionNT >= 501)]]>
</Condition> </Condition>
<InstallExecuteSequence> <InstallExecuteSequence>

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob import os, cPickle, re, shutil, marshal, zipfile, glob
from zlib import compress from zlib import compress
from setup import Command, basenames, __appname__ from setup import Command, basenames, __appname__
@ -23,13 +23,114 @@ def get_opts_from_parser(parser):
for o in g.option_list: for o in g.option_list:
for x in do_opt(o): yield x for x in do_opt(o): yield x
class Resources(Command): class Kakasi(Command):
description = 'Compile various needed calibre resources' description = 'Compile resources for unihandecode'
KAKASI_PATH = os.path.join(Command.SRC, __appname__, KAKASI_PATH = os.path.join(Command.SRC, __appname__,
'ebooks', 'unihandecode', 'pykakasi') 'ebooks', 'unihandecode', 'pykakasi')
def run(self, opts):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.pickle')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src):
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
self.parsekdict(line)
self.kanwaout(dest)
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
return
def mkitaiji(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'wb'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
return
(yomi, kanji) = line.split(' ')
if ord(yomi[-1:]) <= ord('z'):
tail = yomi[-1:]
yomi = yomi[:-1]
else:
tail = ''
self.updaterec(kanji, yomi, tail)
def updaterec(self, kanji, yomi, tail):
key = "%04x"%ord(kanji[0])
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
rec.append((yomi,tail))
self.records[key].update( {kanji: rec} )
else:
self.records[key][kanji]=[(yomi, tail)]
else:
self.records[key] = {}
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
with open(out, 'wb') as f:
dic = {}
for k, v in self.records.iteritems():
dic[k] = compress(marshal.dumps(v))
cPickle.dump(dic, f, -1)
def clean(self):
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
if os.path.exists(kakasi):
shutil.rmtree(kakasi)
class Resources(Command):
description = 'Compile various needed calibre resources'
sub_commands = ['kakasi']
def run(self, opts): def run(self, opts):
scripts = {} scripts = {}
for x in ('console', 'gui'): for x in ('console', 'gui'):
@ -117,108 +218,13 @@ class Resources(Command):
import json import json
json.dump(function_dict, open(dest, 'wb'), indent=4) json.dump(function_dict, open(dest, 'wb'), indent=4)
self.run_kakasi(opts)
def run_kakasi(self, opts):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src):
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
self.parsekdict(line)
self.kanwaout(dest)
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
return
def mkitaiji(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
return
(yomi, kanji) = line.split(' ')
if ord(yomi[-1:]) <= ord('z'):
tail = yomi[-1:]
yomi = yomi[:-1]
else:
tail = ''
self.updaterec(kanji, yomi, tail)
def updaterec(self, kanji, yomi, tail):
key = "%04x"%ord(kanji[0])
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
rec.append((yomi,tail))
self.records[key].update( {kanji: rec} )
else:
self.records[key][kanji]=[(yomi, tail)]
else:
self.records[key] = {}
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
dic = anydbm.open(out, 'c')
for (k, v) in self.records.iteritems():
dic[k] = compress(marshal.dumps(v))
dic.close()
def clean(self): def clean(self):
for x in ('scripts', 'recipes', 'ebook-convert-complete'): for x in ('scripts', 'recipes', 'ebook-convert-complete'):
x = self.j(self.RESOURCES, x+'.pickle') x = self.j(self.RESOURCES, x+'.pickle')
if os.path.exists(x): if os.path.exists(x):
os.remove(x) os.remove(x)
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi') from setup.commands import kakasi
if os.path.exists(kakasi): kakasi.clean()
shutil.rmtree(kakasi)

View File

@ -12,10 +12,10 @@ from functools import partial
warnings.simplefilter('ignore', DeprecationWarning) warnings.simplefilter('ignore', DeprecationWarning)
from calibre.constants import (iswindows, isosx, islinux, isfreebsd, isfrozen, from calibre.constants import (iswindows, isosx, islinux, isfrozen,
preferred_encoding, __appname__, __version__, __author__, isbsd, preferred_encoding, __appname__, __version__, __author__,
win32event, win32api, winerror, fcntl, win32event, win32api, winerror, fcntl,
filesystem_encoding, plugins, config_dir) filesystem_encoding, plugins, config_dir)
from calibre.startup import winutil, winutilerror from calibre.startup import winutil, winutilerror
if False and islinux and not getattr(sys, 'frozen', False): if False and islinux and not getattr(sys, 'frozen', False):
@ -31,7 +31,7 @@ if False:
# Prevent pyflakes from complaining # Prevent pyflakes from complaining
winutil, winutilerror, __appname__, islinux, __version__ winutil, winutilerror, __appname__, islinux, __version__
fcntl, win32event, isfrozen, __author__ fcntl, win32event, isfrozen, __author__
winerror, win32api, isfreebsd winerror, win32api, isbsd
_mt_inited = False _mt_inited = False
def _init_mimetypes(): def _init_mimetypes():
@ -630,6 +630,24 @@ def human_readable(size):
size = size[:-2] size = size[:-2]
return size + " " + suffix return size + " " + suffix
def remove_bracketed_text(src,
brackets={u'(':u')', u'[':u']', u'{':u'}'}):
from collections import Counter
counts = Counter()
buf = []
src = force_unicode(src)
rmap = dict([(v, k) for k, v in brackets.iteritems()])
for char in src:
if char in brackets:
counts[char] += 1
elif char in rmap:
idx = rmap[char]
if counts[idx] > 0:
counts[idx] -= 1
elif sum(counts.itervalues()) < 1:
buf.append(char)
return u''.join(buf)
if isosx: if isosx:
import glob, shutil import glob, shutil
fdir = os.path.expanduser('~/.fonts') fdir = os.path.expanduser('~/.fonts')

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 8, 0) numeric_version = (0, 8, 2)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
@ -27,7 +27,9 @@ iswindows = 'win32' in _plat or 'win64' in _plat
isosx = 'darwin' in _plat isosx = 'darwin' in _plat
isnewosx = isosx and getattr(sys, 'new_app_bundle', False) isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
isfreebsd = 'freebsd' in _plat isfreebsd = 'freebsd' in _plat
islinux = not(iswindows or isosx or isfreebsd) isnetbsd = 'netbsd' in _plat
isbsd = isfreebsd or isnetbsd
islinux = not(iswindows or isosx or isbsd)
isfrozen = hasattr(sys, 'frozen') isfrozen = hasattr(sys, 'frozen')
isunix = isosx or islinux isunix = isosx or islinux

View File

@ -607,9 +607,22 @@ class StoreBase(Plugin): # {{{
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
author = 'John Schember' author = 'John Schember'
type = _('Store') type = _('Store')
# Information about the store. Should be in the primary language
# of the store. This should not be translatable when set by
# a subclass.
description = _('An ebook store.')
minimum_calibre_version = (0, 8, 0) minimum_calibre_version = (0, 8, 0)
version = (1, 0, 1)
actual_plugin = None actual_plugin = None
# Does the store only distribute ebooks without DRM.
drm_free_only = False
# This is the 2 letter country code for the corporate
# headquarters of the store.
headquarters = ''
# All formats the store distributes ebooks in.
formats = []
def load_actual_plugin(self, gui): def load_actual_plugin(self, gui):
''' '''

View File

@ -1,4 +1,5 @@
import os.path # -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -629,6 +630,7 @@ from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
from calibre.ebooks.metadata.sources.isbndb import ISBNDB from calibre.ebooks.metadata.sources.isbndb import ISBNDB
from calibre.ebooks.metadata.sources.overdrive import OverDrive from calibre.ebooks.metadata.sources.overdrive import OverDrive
from calibre.ebooks.metadata.sources.douban import Douban from calibre.ebooks.metadata.sources.douban import Douban
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban] plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
# }}} # }}}
@ -853,6 +855,17 @@ class ActionStore(InterfaceActionBase):
author = 'John Schember' author = 'John Schember'
actual_plugin = 'calibre.gui2.actions.store:StoreAction' actual_plugin = 'calibre.gui2.actions.store:StoreAction'
def customization_help(self, gui=False):
return 'Customize the behavior of the store search.'
def config_widget(self):
from calibre.gui2.store.config.store import config_widget as get_cw
return get_cw()
def save_settings(self, config_widget):
from calibre.gui2.store.config.store import save_settings as save
save(config_widget)
plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog, plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
ActionConvert, ActionDelete, ActionEditMetadata, ActionView, ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
ActionFetchNews, ActionSaveToDisk, ActionShowBookDetails, ActionFetchNews, ActionSaveToDisk, ActionShowBookDetails,
@ -1093,100 +1106,315 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
# Store plugins {{{ # Store plugins {{{
class StoreAmazonKindleStore(StoreBase): class StoreAmazonKindleStore(StoreBase):
name = 'Amazon Kindle' name = 'Amazon Kindle'
description = _('Kindle books from Amazon') description = u'Kindle books from Amazon.'
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore' actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
drm_free_only = False
headquarters = 'US'
formats = ['KINDLE']
class StoreAmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle'
author = 'Charles Haley'
description = u'Kindle Bücher von Amazon.'
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
drm_free_only = False
headquarters = 'DE'
formats = ['KINDLE']
class StoreAmazonUKKindleStore(StoreBase): class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle' name = 'Amazon UK Kindle'
description = _('Kindle books from Amazon.uk') author = 'Charles Haley'
description = u'Kindle books from Amazon\'s UK web site. Also, includes French language ebooks.'
actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore' actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
drm_free_only = False
headquarters = 'UK'
formats = ['KINDLE']
class StoreArchiveOrgStore(StoreBase):
name = 'Archive.org'
description = u'An Internet library offering permanent access for researchers, historians, scholars, people with disabilities, and the general public to historical collections that exist in digital format.'
actual_plugin = 'calibre.gui2.store.archive_org_plugin:ArchiveOrgStore'
drm_free_only = True
headquarters = 'US'
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
class StoreBaenWebScriptionStore(StoreBase): class StoreBaenWebScriptionStore(StoreBase):
name = 'Baen WebScription' name = 'Baen WebScription'
description = _('Ebooks for readers.') description = u'Sci-Fi & Fantasy brought to you by Jim Baen.'
actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore' actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'LIT', 'LRF', 'MOBI', 'RB', 'RTF', 'ZIP']
class StoreBNStore(StoreBase): class StoreBNStore(StoreBase):
name = 'Barnes and Noble' name = 'Barnes and Noble'
description = _('Books, Textbooks, eBooks, Toys, Games and More.') description = u'The world\'s largest book seller. As the ultimate destination for book lovers, Barnes & Noble.com offers an incredible array of content.'
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore' actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
drm_free_only = False
headquarters = 'US'
formats = ['NOOK']
class StoreBeamEBooksDEStore(StoreBase):
name = 'Beam EBooks DE'
author = 'Charles Haley'
description = u'Bei uns finden Sie: Tausende deutschsprachige eBooks; Alle eBooks ohne hartes DRM; PDF, ePub und Mobipocket Format; Sofortige Verfügbarkeit - 24 Stunden am Tag; Günstige Preise; eBooks für viele Lesegeräte, PC,Mac und Smartphones; Viele Gratis eBooks'
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
drm_free_only = True
headquarters = 'DE'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreBeWriteStore(StoreBase): class StoreBeWriteStore(StoreBase):
name = 'BeWrite Books' name = 'BeWrite Books'
description = _('Publishers of fine books.') description = u'Publishers of fine books. Highly selective and editorially driven. Does not offer: books for children or exclusively YA, erotica, swords-and-sorcery fantasy and space-opera-style science fiction. All other genres are represented.'
actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore' actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreDieselEbooksStore(StoreBase): class StoreDieselEbooksStore(StoreBase):
name = 'Diesel eBooks' name = 'Diesel eBooks'
description = _('World Famous eBook Store.') description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore' actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore'
drm_free_only = False
headquarters = 'US'
formats = ['EPUB', 'PDF']
class StoreEbookscomStore(StoreBase): class StoreEbookscomStore(StoreBase):
name = 'eBooks.com' name = 'eBooks.com'
description = _('The digital bookstore.') description = u'Sells books in multiple electronic formats in all categories. Technical infrastructure is cutting edge, robust and scalable, with servers in the US and Europe.'
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore' actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
class StoreEHarlequinStoretore(StoreBase): drm_free_only = False
headquarters = 'US'
formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
class StoreEPubBuyDEStore(StoreBase):
name = 'EPUBBuy DE'
author = 'Charles Haley'
description = u'Bei EPUBBuy.com finden Sie ausschliesslich eBooks im weitverbreiteten EPUB-Format und ohne DRM. So haben Sie die freie Wahl, wo Sie Ihr eBook lesen: Tablet, eBook-Reader, Smartphone oder einfach auf Ihrem PC. So macht eBook-Lesen Spaß!'
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
drm_free_only = True
headquarters = 'DE'
formats = ['EPUB']
class StoreEHarlequinStore(StoreBase):
name = 'eHarlequin' name = 'eHarlequin'
description = _('entertain, enrich, inspire.') description = u'A global leader in series romance and one of the world\'s leading publishers of books for women. Offers women a broad range of reading from romance to bestseller fiction, from young adult novels to erotic literature, from nonfiction to fantasy, from African-American novels to inspirational romance, and more.'
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore' actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
drm_free_only = False
headquarters = 'CA'
formats = ['EPUB', 'PDF']
class StoreFeedbooksStore(StoreBase): class StoreFeedbooksStore(StoreBase):
name = 'Feedbooks' name = 'Feedbooks'
description = _('Read anywhere.') description = u'Feedbooks is a cloud publishing and distribution service, connected to a large ecosystem of reading systems and social networks. Provides a variety of genres from independent and classic books.'
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore' actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
class StoreGutenbergStore(StoreBase): drm_free_only = False
name = 'Project Gutenberg' headquarters = 'FR'
description = _('The first producer of free ebooks.') formats = ['EPUB', 'MOBI', 'PDF']
actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
class StoreKoboStore(StoreBase):
name = 'Kobo'
description = _('eReading: anytime. anyplace.')
actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
class StoreManyBooksStore(StoreBase):
name = 'ManyBooks'
description = _('The best ebooks at the best price: free!')
actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
class StoreMobileReadStore(StoreBase):
name = 'MobileRead'
description = _('Ebooks handcrafted with the utmost care')
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
class StoreOpenLibraryStore(StoreBase):
name = 'Open Library'
description = _('One web page for every book.')
actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
class StoreSmashwordsStore(StoreBase):
name = 'Smashwords'
description = _('Your ebook. Your way.')
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
class StoreWaterstonesUKStore(StoreBase):
name = 'Waterstones UK'
description = _('Feel every word')
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
class StoreFoylesUKStore(StoreBase): class StoreFoylesUKStore(StoreBase):
name = 'Foyles UK' name = 'Foyles UK'
description = _('Foyles of London, online') author = 'Charles Haley'
description = u'Foyles of London\'s ebook store. Provides extensive range covering all subjects.'
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore' actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
class AmazonDEKindleStore(StoreBase): drm_free_only = False
name = 'Amazon DE Kindle' headquarters = 'UK'
description = _('Kindle eBooks') formats = ['EPUB', 'PDF']
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore, class StoreGandalfStore(StoreBase):
StoreBaenWebScriptionStore, StoreBNStore, name = 'Gandalf'
StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore, author = u'Tomasz Długosz'
StoreEHarlequinStoretore, StoreFeedbooksStore, description = u'Księgarnia internetowa Gandalf.'
StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore, actual_plugin = 'calibre.gui2.store.gandalf_plugin:GandalfStore'
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
StoreWaterstonesUKStore] drm_free_only = False
headquarters = 'PL'
formats = ['EPUB', 'PDF']
class StoreGoogleBooksStore(StoreBase):
name = 'Google Books'
description = u'Google Books'
actual_plugin = 'calibre.gui2.store.google_books_plugin:GoogleBooksStore'
drm_free_only = False
headquarters = 'US'
formats = ['EPUB', 'PDF', 'TXT']
class StoreGutenbergStore(StoreBase):
name = 'Project Gutenberg'
description = u'The first producer of free ebooks. Free in the United States because their copyright has expired. They may not be free of copyright in other countries. Readers outside of the United States must check the copyright laws of their countries before downloading or redistributing our ebooks.'
actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'HTML', 'MOBI', 'PDB', 'TXT']
class StoreKoboStore(StoreBase):
name = 'Kobo'
description = u'With over 2.3 million eBooks to browse we have engaged readers in over 200 countries in Kobo eReading. Our eBook listings include New York Times Bestsellers, award winners, classics and more!'
actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
drm_free_only = False
headquarters = 'CA'
formats = ['EPUB']
class StoreLegimiStore(StoreBase):
name = 'Legimi'
author = u'Tomasz Długosz'
description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer'
actual_plugin = 'calibre.gui2.store.legimi_plugin:LegimiStore'
drm_free_only = False
headquarters = 'PL'
formats = ['EPUB']
class StoreManyBooksStore(StoreBase):
name = 'ManyBooks'
description = u'Public domain and creative commons works from many sources.'
actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'FB2', 'JAR', 'LIT', 'LRF', 'MOBI', 'PDB', 'PDF', 'RB', 'RTF', 'TCR', 'TXT', 'ZIP']
class StoreMobileReadStore(StoreBase):
name = 'MobileRead'
description = u'Ebooks handcrafted with the utmost care.'
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
drm_free_only = True
headquarters = 'CH'
formats = ['EPUB', 'IMP', 'LRF', 'LIT', 'MOBI', 'PDF']
class StoreNextoStore(StoreBase):
name = 'Nexto'
author = u'Tomasz Długosz'
description = u'Największy w Polsce sklep internetowy z audiobookami mp3, ebookami pdf oraz prasą do pobrania on-line.'
actual_plugin = 'calibre.gui2.store.nexto_plugin:NextoStore'
drm_free_only = False
headquarters = 'PL'
formats = ['EPUB', 'PDF']
class StoreOpenLibraryStore(StoreBase):
name = 'Open Library'
description = u'One web page for every book ever published. The goal is to be a true online library. Over 20 million records from a variety of large catalogs as well as single contributions, with more on the way.'
actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
drm_free_only = True
headquarters = 'US'
formats = ['DAISY', 'DJVU', 'EPUB', 'MOBI', 'PDF', 'TXT']
class StoreOReillyStore(StoreBase):
name = 'OReilly'
description = u'Programming and tech ebooks from OReilly.'
actual_plugin = 'calibre.gui2.store.oreilly_plugin:OReillyStore'
drm_free_only = True
headquarters = 'US'
formats = ['APK', 'DAISY', 'EPUB', 'MOBI', 'PDF']
class StorePragmaticBookshelfStore(StoreBase):
name = 'Pragmatic Bookshelf'
description = u'The Pragmatic Bookshelf\'s collection of programming and tech books avaliable as ebooks.'
actual_plugin = 'calibre.gui2.store.pragmatic_bookshelf_plugin:PragmaticBookshelfStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreSmashwordsStore(StoreBase):
name = 'Smashwords'
description = u'An ebook publishing and distribution platform for ebook authors, publishers and readers. Covers many genres and formats.'
actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'HTML', 'LRF', 'MOBI', 'PDB', 'RTF', 'TXT']
class StoreWaterstonesUKStore(StoreBase):
name = 'Waterstones UK'
author = 'Charles Haley'
description = u'Waterstone\'s mission is to be the leading Bookseller on the High Street and online providing customers the widest choice, great value and expert advice from a team passionate about Bookselling.'
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
drm_free_only = False
headquarters = 'UK'
formats = ['EPUB', 'PDF']
class StoreWeightlessBooksStore(StoreBase):
name = 'Weightless Books'
description = u'An independent DRM-free ebooksite devoted to ebooks of all sorts.'
actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
drm_free_only = True
headquarters = 'US'
formats = ['EPUB', 'HTML', 'LIT', 'MOBI', 'PDF']
class StoreWizardsTowerBooksStore(StoreBase):
name = 'Wizards Tower Books'
description = u'A science fiction and fantasy publisher. Concentrates mainly on making out-of-print works available once more as e-books, and helping other small presses exploit the e-book market. Also publishes a small number of limited-print-run anthologies with a view to encouraging diversity in the science fiction and fantasy field.'
actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
drm_free_only = True
headquarters = 'UK'
formats = ['EPUB', 'MOBI']
class StoreWoblinkStore(StoreBase):
name = 'Woblink'
author = 'Tomasz Długosz'
description = u'Czytanie zdarza się wszędzie!'
actual_plugin = 'calibre.gui2.store.woblink_plugin:WoblinkStore'
drm_free_only = False
headquarters = 'PL'
formats = ['EPUB']
plugins += [
StoreArchiveOrgStore,
StoreAmazonKindleStore,
StoreAmazonDEKindleStore,
StoreAmazonUKKindleStore,
StoreBaenWebScriptionStore,
StoreBNStore,
StoreBeamEBooksDEStore,
StoreBeWriteStore,
StoreDieselEbooksStore,
StoreEbookscomStore,
StoreEPubBuyDEStore,
StoreEHarlequinStore,
StoreFeedbooksStore,
StoreFoylesUKStore,
StoreGandalfStore,
StoreGoogleBooksStore,
StoreGutenbergStore,
StoreKoboStore,
StoreLegimiStore,
StoreManyBooksStore,
StoreMobileReadStore,
StoreNextoStore,
StoreOpenLibraryStore,
StoreOReillyStore,
StorePragmaticBookshelfStore,
StoreSmashwordsStore,
StoreWaterstonesUKStore,
StoreWeightlessBooksStore,
StoreWizardsTowerBooksStore,
StoreWoblinkStore
]
# }}} # }}}

View File

@ -253,7 +253,7 @@ class OutputProfile(Plugin):
periodical_date_in_title = True periodical_date_in_title = True
#: Characters used in jackets and catalogs #: Characters used in jackets and catalogs
missing_char = u'x' missing_char = u'x'
ratings_char = u'*' ratings_char = u'*'
empty_ratings_char = u' ' empty_ratings_char = u' '
read_char = u'+' read_char = u'+'
@ -293,38 +293,38 @@ class iPadOutput(OutputProfile):
} }
] ]
missing_char = u'\u2715\u200a' # stylized 'x' plus hair space missing_char = u'\u2715\u200a' # stylized 'x' plus hair space
ratings_char = u'\u2605' # filled star ratings_char = u'\u2605' # filled star
empty_ratings_char = u'\u2606' # hollow star empty_ratings_char = u'\u2606' # hollow star
read_char = u'\u2713' # check mark read_char = u'\u2713' # check mark
touchscreen = True touchscreen = True
# touchscreen_news_css {{{ # touchscreen_news_css {{{
touchscreen_news_css = u''' touchscreen_news_css = u'''
/* hr used in articles */ /* hr used in articles */
.article_articles_list { .article_articles_list {
width:18%; width:18%;
} }
.article_link { .article_link {
color: #593f29; color: #593f29;
font-style: italic; font-style: italic;
} }
.article_next { .article_next {
-webkit-border-top-right-radius:4px; -webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px; -webkit-border-bottom-right-radius:4px;
font-style: italic; font-style: italic;
width:32%; width:32%;
} }
.article_prev { .article_prev {
-webkit-border-top-left-radius:4px; -webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px; -webkit-border-bottom-left-radius:4px;
font-style: italic; font-style: italic;
width:32%; width:32%;
} }
.article_sections_list { .article_sections_list {
width:18%; width:18%;
} }
.articles_link { .articles_link {
font-weight: bold; font-weight: bold;
} }
@ -334,8 +334,8 @@ class iPadOutput(OutputProfile):
.caption_divider { .caption_divider {
border:#ccc 1px solid; border:#ccc 1px solid;
} }
.touchscreen_navbar { .touchscreen_navbar {
background:#c3bab2; background:#c3bab2;
@ -357,50 +357,50 @@ class iPadOutput(OutputProfile):
text-align:center; text-align:center;
} }
.touchscreen_navbar td a:link { .touchscreen_navbar td a:link {
color: #593f29; color: #593f29;
text-decoration: none; text-decoration: none;
} }
/* Index formatting */ /* Index formatting */
.publish_date { .publish_date {
text-align:center; text-align:center;
} }
.divider { .divider {
border-bottom:1em solid white; border-bottom:1em solid white;
border-top:1px solid gray; border-top:1px solid gray;
} }
hr.caption_divider { hr.caption_divider {
border-color:black; border-color:black;
border-style:solid; border-style:solid;
border-width:1px; border-width:1px;
} }
/* Feed summary formatting */ /* Feed summary formatting */
.article_summary { .article_summary {
display:inline-block; display:inline-block;
} }
.feed { .feed {
font-family:sans-serif; font-family:sans-serif;
font-weight:bold; font-weight:bold;
font-size:larger; font-size:larger;
} }
.feed_link { .feed_link {
font-style: italic; font-style: italic;
} }
.feed_next { .feed_next {
-webkit-border-top-right-radius:4px; -webkit-border-top-right-radius:4px;
-webkit-border-bottom-right-radius:4px; -webkit-border-bottom-right-radius:4px;
font-style: italic; font-style: italic;
width:40%; width:40%;
} }
.feed_prev { .feed_prev {
-webkit-border-top-left-radius:4px; -webkit-border-top-left-radius:4px;
-webkit-border-bottom-left-radius:4px; -webkit-border-bottom-left-radius:4px;
font-style: italic; font-style: italic;
width:40%; width:40%;
} }
@ -410,24 +410,24 @@ class iPadOutput(OutputProfile):
font-size: 160%; font-size: 160%;
} }
.feed_up { .feed_up {
font-weight: bold; font-weight: bold;
width:20%; width:20%;
} }
.summary_headline { .summary_headline {
font-weight:bold; font-weight:bold;
text-align:left; text-align:left;
} }
.summary_byline { .summary_byline {
text-align:left; text-align:left;
font-family:monospace; font-family:monospace;
} }
.summary_text { .summary_text {
text-align:left; text-align:left;
} }
''' '''
# }}} # }}}
@ -617,8 +617,8 @@ class KindleOutput(OutputProfile):
supports_mobi_indexing = True supports_mobi_indexing = True
periodical_date_in_title = False periodical_date_in_title = False
missing_char = u'x\u2009' missing_char = u'x\u2009'
empty_ratings_char = u'\u2606' empty_ratings_char = u'\u2606'
ratings_char = u'\u2605' ratings_char = u'\u2605'
read_char = u'\u2713' read_char = u'\u2713'
@ -642,8 +642,8 @@ class KindleDXOutput(OutputProfile):
#comic_screen_size = (741, 1022) #comic_screen_size = (741, 1022)
supports_mobi_indexing = True supports_mobi_indexing = True
periodical_date_in_title = False periodical_date_in_title = False
missing_char = u'x\u2009' missing_char = u'x\u2009'
empty_ratings_char = u'\u2606' empty_ratings_char = u'\u2606'
ratings_char = u'\u2605' ratings_char = u'\u2605'
read_char = u'\u2713' read_char = u'\u2713'
mobi_ems_per_blockquote = 2.0 mobi_ems_per_blockquote = 2.0

View File

@ -92,7 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
config['enabled_plugins'] = ep config['enabled_plugins'] = ep
default_disabled_plugins = set([ default_disabled_plugins = set([
'Overdrive', 'Overdrive', 'Douban Books',
]) ])
def is_disabled(plugin): def is_disabled(plugin):
@ -216,9 +216,26 @@ def store_plugins():
customization = config['plugin_customization'] customization = config['plugin_customization']
for plugin in _initialized_plugins: for plugin in _initialized_plugins:
if isinstance(plugin, Store): if isinstance(plugin, Store):
if not is_disabled(plugin): plugin.site_customization = customization.get(plugin.name, '')
plugin.site_customization = customization.get(plugin.name, '') yield plugin
yield plugin
def available_store_plugins():
for plugin in store_plugins():
if not is_disabled(plugin):
yield plugin
def stores():
stores = set([])
for plugin in store_plugins():
stores.add(plugin.name)
return stores
def available_stores():
stores = set([])
for plugin in available_store_plugins():
stores.add(plugin.name)
return stores
# }}} # }}}
# Metadata read/write {{{ # Metadata read/write {{{

View File

@ -59,7 +59,7 @@ class ANDROID(USBMS):
0x0489 : { 0xc001 : [0x0226], 0xc004 : [0x0226], }, 0x0489 : { 0xc001 : [0x0226], 0xc004 : [0x0226], },
# Acer # Acer
0x502 : { 0x3203 : [0x0100]}, 0x502 : { 0x3203 : [0x0100, 0x224]},
# Dell # Dell
0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]}, 0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]},
@ -109,7 +109,7 @@ class ANDROID(USBMS):
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H', 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', 'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB'] 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD'] 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']

View File

@ -203,9 +203,11 @@ class ITUNES(DriverBase):
# 0x1294 iPhone 3GS # 0x1294 iPhone 3GS
# 0x1297 iPhone 4 # 0x1297 iPhone 4
# 0x129a iPad # 0x129a iPad
# 0x12a2 iPad2 # 0x129f iPad2 (WiFi)
# 0x12a2 iPad2 (GSM)
# 0x12a3 iPad2 (CDMA)
VENDOR_ID = [0x05ac] VENDOR_ID = [0x05ac]
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x12a2] PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a2,0x12a3]
BCD = [0x01] BCD = [0x01]
# Plugboard ID # Plugboard ID
@ -939,7 +941,7 @@ class ITUNES(DriverBase):
# declared in use_plugboard_ext and a device name of ITUNES # declared in use_plugboard_ext and a device name of ITUNES
if DEBUG: if DEBUG:
self.log.info("ITUNES.set_plugboard()") self.log.info("ITUNES.set_plugboard()")
#self.log.info(' using plugboard %s' % plugboards) #self.log.info(' plugboard: %s' % plugboards)
self.plugboards = plugboards self.plugboards = plugboards
self.plugboard_func = pb_func self.plugboard_func = pb_func
@ -1050,7 +1052,6 @@ class ITUNES(DriverBase):
'title': metadata[i].title, 'title': metadata[i].title,
'uuid': metadata[i].uuid } 'uuid': metadata[i].uuid }
# Report progress # Report progress
if self.report_progress is not None: if self.report_progress is not None:
self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count)) self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))
@ -2742,7 +2743,7 @@ class ITUNES(DriverBase):
# Update metadata from plugboard # Update metadata from plugboard
# If self.plugboard is None (no transforms), original metadata is returned intact # If self.plugboard is None (no transforms), original metadata is returned intact
metadata_x = self._xform_metadata_via_plugboard(metadata, this_book.format) metadata_x = self._xform_metadata_via_plugboard(metadata, this_book.format)
self.log("metadata.title_sort: %s metadata_x.title_sort: %s" % (metadata.title_sort, metadata_x.title_sort))
if isosx: if isosx:
if lb_added: if lb_added:
lb_added.name.set(metadata_x.title) lb_added.name.set(metadata_x.title)
@ -2752,8 +2753,7 @@ class ITUNES(DriverBase):
lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S'))) lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
lb_added.enabled.set(True) lb_added.enabled.set(True)
lb_added.sort_artist.set(icu_title(metadata_x.author_sort)) lb_added.sort_artist.set(icu_title(metadata_x.author_sort))
lb_added.sort_name.set(metadata.title_sort) lb_added.sort_name.set(metadata_x.title_sort)
if db_added: if db_added:
db_added.name.set(metadata_x.title) db_added.name.set(metadata_x.title)
@ -2763,7 +2763,7 @@ class ITUNES(DriverBase):
db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S'))) db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
db_added.enabled.set(True) db_added.enabled.set(True)
db_added.sort_artist.set(icu_title(metadata_x.author_sort)) db_added.sort_artist.set(icu_title(metadata_x.author_sort))
db_added.sort_name.set(metadata.title_sort) db_added.sort_name.set(metadata_x.title_sort)
if metadata_x.comments: if metadata_x.comments:
if lb_added: if lb_added:
@ -2783,6 +2783,7 @@ class ITUNES(DriverBase):
# Set genre from series if available, else first alpha tag # Set genre from series if available, else first alpha tag
# Otherwise iTunes grabs the first dc:subject from the opf metadata # Otherwise iTunes grabs the first dc:subject from the opf metadata
# If title_sort applied in plugboard, that overrides using series/index as title_sort
if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]: if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]:
if DEBUG: if DEBUG:
self.log.info(" ITUNES._update_iTunes_metadata()") self.log.info(" ITUNES._update_iTunes_metadata()")
@ -2794,7 +2795,9 @@ class ITUNES(DriverBase):
fraction = index-integer fraction = index-integer
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0')) series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
if lb_added: if lb_added:
lb_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index)) # If no title_sort plugboard tweak, create sort_name from series/index
if metadata.title_sort == metadata_x.title_sort:
lb_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
lb_added.episode_ID.set(metadata_x.series) lb_added.episode_ID.set(metadata_x.series)
lb_added.episode_number.set(metadata_x.series_index) lb_added.episode_number.set(metadata_x.series_index)
@ -2808,7 +2811,9 @@ class ITUNES(DriverBase):
break break
if db_added: if db_added:
db_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index)) # If no title_sort plugboard tweak, create sort_name from series/index
if metadata.title_sort == metadata_x.title_sort:
db_added.sort_name.set("%s %s" % (self.title_sorter(metadata_x.series), series_index))
db_added.episode_ID.set(metadata_x.series) db_added.episode_ID.set(metadata_x.series)
db_added.episode_number.set(metadata_x.series_index) db_added.episode_number.set(metadata_x.series_index)
@ -2843,7 +2848,7 @@ class ITUNES(DriverBase):
lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S'))) lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
lb_added.Enabled = True lb_added.Enabled = True
lb_added.SortArtist = icu_title(metadata_x.author_sort) lb_added.SortArtist = icu_title(metadata_x.author_sort)
lb_added.SortName = metadata.title_sort lb_added.SortName = metadata_x.title_sort
if db_added: if db_added:
db_added.Name = metadata_x.title db_added.Name = metadata_x.title
@ -2853,7 +2858,7 @@ class ITUNES(DriverBase):
db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S'))) db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
db_added.Enabled = True db_added.Enabled = True
db_added.SortArtist = icu_title(metadata_x.author_sort) db_added.SortArtist = icu_title(metadata_x.author_sort)
db_added.SortName = metadata.title_sort db_added.SortName = metadata_x.title_sort
if metadata_x.comments: if metadata_x.comments:
if lb_added: if lb_added:
@ -2886,7 +2891,9 @@ class ITUNES(DriverBase):
fraction = index-integer fraction = index-integer
series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0')) series_index = '%04d%s' % (integer, str('%0.4f' % fraction).lstrip('0'))
if lb_added: if lb_added:
lb_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index) # If no title_sort plugboard tweak, create sort_name from series/index
if metadata.title_sort == metadata_x.title_sort:
lb_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
lb_added.EpisodeID = metadata_x.series lb_added.EpisodeID = metadata_x.series
try: try:
@ -2912,7 +2919,9 @@ class ITUNES(DriverBase):
break break
if db_added: if db_added:
db_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index) # If no title_sort plugboard tweak, create sort_name from series/index
if metadata.title_sort == metadata_x.title_sort:
db_added.SortName = "%s %s" % (self.title_sorter(metadata_x.series), series_index)
db_added.EpisodeID = metadata_x.series db_added.EpisodeID = metadata_x.series
try: try:
@ -2973,6 +2982,9 @@ class ITUNES(DriverBase):
newmi.publisher if book.publisher != newmi.publisher else '')) newmi.publisher if book.publisher != newmi.publisher else ''))
self.log.info(" tags: %s %s" % (book.tags, ">>> %s" % self.log.info(" tags: %s %s" % (book.tags, ">>> %s" %
newmi.tags if book.tags != newmi.tags else '')) newmi.tags if book.tags != newmi.tags else ''))
else:
self.log(" matching plugboard not found")
else: else:
newmi = book newmi = book
return newmi return newmi

View File

@ -95,9 +95,8 @@ class POCKETBOOK360(EB600):
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt'] FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']
VENDOR_NAME = 'PHILIPS' VENDOR_NAME = ['PHILIPS', '__POCKET']
WINDOWS_MAIN_MEM = 'MASS_STORGE' WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['MASS_STORGE', 'BOOK_USB_STORAGE']
WINDOWS_CARD_A_MEM = 'MASS_STORGE'
OSX_MAIN_MEM = 'Philips Mass Storge Media' OSX_MAIN_MEM = 'Philips Mass Storge Media'
OSX_CARD_A_MEM = 'Philips Mass Storge Media' OSX_CARD_A_MEM = 'Philips Mass Storge Media'

View File

@ -38,7 +38,7 @@ class KOBO(USBMS):
VENDOR_ID = [0x2237] VENDOR_ID = [0x2237]
PRODUCT_ID = [0x4161] PRODUCT_ID = [0x4161]
BCD = [0x0110] BCD = [0x0110, 0x0323]
VENDOR_NAME = ['KOBO_INC', 'KOBO'] VENDOR_NAME = ['KOBO_INC', 'KOBO']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['.KOBOEREADER', 'EREADER'] WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['.KOBOEREADER', 'EREADER']

View File

@ -8,10 +8,10 @@ from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \
c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint
from errno import EBUSY, ENOMEM from errno import EBUSY, ENOMEM
from calibre import iswindows, isosx, isfreebsd, load_library from calibre import iswindows, isosx, isbsd, load_library
_libusb_name = 'libusb' _libusb_name = 'libusb'
PATH_MAX = 511 if iswindows else 1024 if (isosx or isfreebsd) else 4096 PATH_MAX = 511 if iswindows else 1024 if (isosx or isbsd) else 4096
if iswindows: if iswindows:
class Structure(_Structure): class Structure(_Structure):
_pack_ = 1 _pack_ = 1

View File

@ -269,8 +269,8 @@ class NEXTBOOK(USBMS):
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
VENDOR_NAME = 'NEXT2' VENDOR_NAME = ['NEXT2', 'BK7005']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = '1.0.14' WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['1.0.14', 'PLAYER']
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = True
THUMBNAIL_HEIGHT = 120 THUMBNAIL_HEIGHT = 120

View File

@ -926,8 +926,8 @@ class Device(DeviceConfig, DevicePlugin):
if not isinstance(template, unicode): if not isinstance(template, unicode):
template = template.decode('utf-8') template = template.decode('utf-8')
app_id = str(getattr(mdata, 'application_id', '')) app_id = str(getattr(mdata, 'application_id', ''))
# The db id will be in the created filename id_ = mdata.get('id', fname)
extra_components = get_components(template, mdata, fname, extra_components = get_components(template, mdata, id_,
timefmt=opts.send_timefmt, length=maxlen-len(app_id)-1) timefmt=opts.send_timefmt, length=maxlen-len(app_id)-1)
if not extra_components: if not extra_components:
extra_components.append(sanitize(self.filename_callback(fname, extra_components.append(sanitize(self.filename_callback(fname,

View File

@ -68,7 +68,8 @@ def check_command_line_options(parser, args, log):
raise SystemExit(1) raise SystemExit(1)
output = args[2] output = args[2]
if output.startswith('.') and output != '.': if output.startswith('.') and (output != '.' and not
output.startswith('..')):
output = os.path.splitext(os.path.basename(input))[0]+output output = os.path.splitext(os.path.basename(input))[0]+output
output = os.path.abspath(output) output = os.path.abspath(output)

View File

@ -103,10 +103,11 @@ class EPUBInput(InputFormatPlugin):
t.set('href', guide_cover) t.set('href', guide_cover)
t.set('title', 'Title Page') t.set('title', 'Title Page')
from calibre.ebooks import render_html_svg_workaround from calibre.ebooks import render_html_svg_workaround
renderer = render_html_svg_workaround(guide_cover, log) if os.path.exists(guide_cover):
if renderer is not None: renderer = render_html_svg_workaround(guide_cover, log)
open('calibre_raster_cover.jpg', 'wb').write( if renderer is not None:
renderer) open('calibre_raster_cover.jpg', 'wb').write(
renderer)
def find_opf(self): def find_opf(self):
def attr(n, attr): def attr(n, attr):

View File

@ -413,6 +413,13 @@ class EPUBOutput(OutputFormatPlugin):
rule.style.removeProperty('margin-left') rule.style.removeProperty('margin-left')
# padding-left breaks rendering in webkit and gecko # padding-left breaks rendering in webkit and gecko
rule.style.removeProperty('padding-left') rule.style.removeProperty('padding-left')
# Change whitespace:pre to pre-line to accommodate readers that
# cannot scroll horizontally
for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
style = rule.style
ws = style.getPropertyValue('white-space')
if ws == 'pre':
style.setProperty('white-space', 'pre-wrap')
# }}} # }}}

View File

@ -20,7 +20,7 @@ from itertools import izip
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre.constants import islinux, isfreebsd, iswindows from calibre.constants import islinux, isbsd, iswindows
from calibre import unicode_path, as_unicode from calibre import unicode_path, as_unicode
from calibre.utils.localization import get_lang from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
@ -302,7 +302,7 @@ class HTMLInput(InputFormatPlugin):
if getattr(self, '_is_case_sensitive', None) is not None: if getattr(self, '_is_case_sensitive', None) is not None:
return self._is_case_sensitive return self._is_case_sensitive
if not path or not os.path.exists(path): if not path or not os.path.exists(path):
return islinux or isfreebsd return islinux or isbsd
self._is_case_sensitive = not (os.path.exists(path.lower()) \ self._is_case_sensitive = not (os.path.exists(path.lower()) \
and os.path.exists(path.upper())) and os.path.exists(path.upper()))
return self._is_case_sensitive return self._is_case_sensitive

View File

@ -7,7 +7,6 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os
import posixpath
from calibre import guess_type, walk from calibre import guess_type, walk
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
@ -74,22 +73,23 @@ class HTMLZInput(InputFormatPlugin):
meta_info_to_oeb_metadata(mi, oeb.metadata, log) meta_info_to_oeb_metadata(mi, oeb.metadata, log)
# Get the cover path from the OPF. # Get the cover path from the OPF.
cover_href = None cover_path = None
opf = None opf = None
for x in walk('.'): for x in walk('.'):
if os.path.splitext(x)[1].lower() in ('.opf'): if os.path.splitext(x)[1].lower() in ('.opf'):
opf = x opf = x
break break
if opf: if opf:
opf = OPF(opf) opf = OPF(opf, basedir=os.getcwd())
cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name)) cover_path = opf.raster_cover
# Set the cover. # Set the cover.
if cover_href: if cover_path:
cdata = None cdata = None
with open(cover_href, 'rb') as cf: with open(os.path.join(os.getcwd(), cover_path), 'rb') as cf:
cdata = cf.read() cdata = cf.read()
id, href = oeb.manifest.generate('cover', cover_href) cover_name = os.path.basename(cover_path)
oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata) id, href = oeb.manifest.generate('cover', cover_name)
oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata)
oeb.guide.add('cover', 'Cover', href) oeb.guide.add('cover', 'Cover', href)
return oeb return oeb

Some files were not shown because too many files have changed in this diff Show More