Merge from trunk

This commit is contained in:
Charles Haley 2012-06-09 15:43:22 +02:00
commit 79ca483efa
146 changed files with 48795 additions and 42140 deletions

View File

@ -19,6 +19,69 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.8.55
date: 2012-06-08
new features:
- title: "Add a new 'Calibre style' interface look that is more modern than the default look. You can select it via Preferences->Look & Feel->User interface style."
- title: "New, subtler look for the Tag Browser"
- title: "Driver for Trekstor Pyrus and Pantech Android Tablet"
tickets: [1008946, 1007929]
- title: "Conversion pipeline: Handle guide elements with incorrectly cased hrefs. Also handle guide elements of type coverimagestandard and thumbimagestandard."
- title: "Allow user to customize trekstor plugin to send books into sub directories."
tickets: [1007646]
- title: "EPUB Input: Add support for EPUB files that use the IDPF font obfuscation algorithm. Apparently, people have started producing these now."
tickets: [1008810]
- title: "Save single format to disk: Only show the format available in the selected books."
tickets: [1007287]
bug fixes:
- title: "MOBI Output: When using the insert metadata at start of book option, do not use a table to layout the metadata, as the Kindle Fire crashes when rendering the table."
tickets: [1002119]
- title: "Device detection: Fix a bug that could cause device detection to fail completely if devices with certain vendor/product ids are connected."
tickets: [1009718]
- title: "MOBI Output: When rasterizing svgs only compute style information when an actual svg image is present. Small speedup when converting large svg-free documents to MOBI."
- title: "SONY T1 driver: Fix support for collections of books placed on the SD card"
tickets: [986044]
- title: "Fix partitioning problems in tag browser with fields that have no name, such as identifiers and formats"
- title: "Welcome wizard: Preferentially use the kindle email address set as default when more than one such address exists."
tickets: [1007932 ]
- title: "Fix regression in 0.8.54 that broke the use of the shortcut Alt+A to select books by the same author"
improved recipes:
- Various Polish recipes
- Vice Magazine
- EL Mundo Today
- Haaretz
- Good Housekeeping
- El Pais
- Christian Science Monitor
- Marketing Magazine
- Instapaper
new recipes:
- title: Various Philippine news sources
author: jde
- title: Natemat.pl and wirtualnemedia.pl
author: fenuks
- title: Rabble.ca
author: timtoo
- version: 0.8.54 - version: 0.8.54
date: 2012-05-31 date: 2012-05-31

68
recipes/banat_news.recipe Normal file
View File

@ -0,0 +1,68 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class BanatNews(BasicNewsRecipe):
title = 'Banat News'
custom_title = "Banat News - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'ceb'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_Banat.jpg'
masthead_url = 'http://www.philstar.com/images/logo_Banat.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Balita' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101' )
,('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102' )
,('Kalingawan' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104' )
,('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62' )
,('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103' )
,('Imong Kapalaran' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -1,152 +1,110 @@
#!/usr/bin/env python __license__ = 'GPL v3'
__license__ = 'GPL v3' __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
__author__ = 'Kovid Goyal and Sujata Raman, Lorenzo Vigentini' '''
__copyright__ = '2009, Kovid Goyal and Sujata Raman' www.csmonitor.com
__version__ = 'v1.02' '''
__date__ = '10, January 2010'
__description__ = 'Providing context and clarity on national and international news, peoples and cultures'
'''csmonitor.com'''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class CSMonitor(BasicNewsRecipe):
class ChristianScienceMonitor(BasicNewsRecipe): title = 'The Christian Science Monitor - daily'
__author__ = 'Darko Miletic'
__author__ = 'Kovid Goyal' description = 'The Christian Science Monitor is an international news organization that delivers thoughtful, global coverage via its website, weekly magazine, daily news briefing, and email newsletters.'
description = 'Providing context and clarity on national and international news, peoples and cultures' publisher = 'The Christian Science Monitor'
category = 'news, politics, USA'
cover_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif' oldest_article = 2
title = 'Christian Science Monitor' max_articles_per_feed = 200
publisher = 'The Christian Science Monitor' no_stylesheets = True
category = 'News, politics, culture, economy, general interest' encoding = 'utf8'
language = 'en'
encoding = 'utf-8'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 16
max_articles_per_feed = 20
use_embedded_content = False use_embedded_content = False
recursion = 10 language = 'en'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
extra_css = """
body{font-family: Arial,Tahoma,Verdana,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
.head {font-family: Georgia,"Times New Roman",Times,serif}
.sByline,.caption{font-size: x-small}
.hide{display: none}
.sLoc{font-weight: bold}
ul{list-style-type: none}
"""
remove_javascript = True conversion_options = {
no_stylesheets = True 'comment' : description
requires_version = (0, 8, 39) , 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def preprocess_raw_html(self, raw, url): remove_tags = [
try: dict(name=['meta','link','iframe','object','embed'])
from html5lib import parse ,dict(attrs={'class':['podStoryRel','bottom-rel','hide']})
root = parse(raw, namespaceHTMLElements=False, ,dict(attrs={'id':['pgallerycarousel_enlarge','pgallerycarousel_related']})
treebuilder='lxml').getroot() ]
from lxml import etree keep_only_tags = [
for tag in root.xpath( dict(name='h1', attrs={'class':'head'})
'//script|//style|//noscript|//meta|//link|//object'): ,dict(name='h2', attrs={'class':'subhead'})
tag.getparent().remove(tag) ,dict(attrs={'class':['sByline','podStoryGal','ui-body-header','sBody']})
for elem in list(root.iterdescendants(tag=etree.Comment)): ]
elem.getparent().remove(elem) remove_attributes=['xmlns:fb']
ans = etree.tostring(root, encoding=unicode)
ans = re.sub('.*<html', '<html', ans, flags=re.DOTALL)
return ans
except:
import traceback
traceback.print_exc()
raise
def index_to_soup(self, url): feeds = [
raw = BasicNewsRecipe.index_to_soup(self, url, (u'USA' , u'http://rss.csmonitor.com/feeds/usa' )
raw=True).decode('utf-8') ,(u'World' , u'http://rss.csmonitor.com/feeds/world' )
raw = self.preprocess_raw_html(raw, url) ,(u'Politics' , u'http://rss.csmonitor.com/feeds/politics' )
return BasicNewsRecipe.index_to_soup(self, raw) ,(u'Business' , u'http://rss.csmonitor.com/feeds/wam' )
,(u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary' )
,(u'Books' , u'http://rss.csmonitor.com/feeds/books' )
,(u'Arts' , u'http://rss.csmonitor.com/feeds/arts' )
,(u'Environment' , u'http://rss.csmonitor.com/feeds/environment')
,(u'Innovation' , u'http://rss.csmonitor.com/feeds/scitech' )
,(u'Living' , u'http://rss.csmonitor.com/feeds/living' )
,(u'Science' , u'http://rss.csmonitor.com/feeds/science' )
,(u'The Culture' , u'http://rss.csmonitor.com/feeds/theculture' )
,(u'The Home Forum', u'http://rss.csmonitor.com/feeds/homeforum' )
,(u'Articles' , u'http://rss.csmonitor.com/feeds/csarticles' )
]
def append_page(self, soup, appendtag, position): def append_page(self, soup):
nav = soup.find('div',attrs={'class':'navigation'}) pager = soup.find('div', attrs={'class':'navigation'})
if nav: if pager:
pager = nav.findAll('a') nexttag = pager.find(attrs={'id':'next-button'})
for part in pager: if nexttag:
if 'Next' in part: nurl = 'http://www.csmonitor.com' + nexttag['href']
nexturl = ('http://www.csmonitor.com' + soup2 = self.index_to_soup(nurl)
re.findall(r'href="(.*?)"', str(part))[0]) texttag = soup2.find(attrs={'class':'sBody'})
soup2 = self.index_to_soup(nexturl) if texttag:
texttag = soup2.find('div', appendtag = soup.find(attrs={'class':'sBody'})
attrs={'class': re.compile('list-article-.*')}) for citem in texttag.findAll(attrs={'class':['podStoryRel','bottom-rel','hide']}):
trash_c = soup2.findAll(attrs={'class': 'list-description'}) citem.extract()
trash_h = soup2.h1 self.append_page(soup2)
for tc in trash_c: tc.extract() texttag.extract()
trash_h.extract() pager.extract()
appendtag.append(texttag)
newpos = len(texttag.contents)
self.append_page(soup2, texttag, newpos)
texttag.extract()
appendtag.insert(position, texttag)
def preprocess_html(self, soup): def preprocess_html(self, soup):
PRINT_RE = re.compile(r'/layout/set/print/content/view/print/[0-9]*') self.append_page(soup)
html = str(soup) pager = soup.find('div', attrs={'class':'navigation'})
try: if pager:
print_found = PRINT_RE.findall(html) pager.extract()
except Exception: for item in soup.findAll('a'):
pass limg = item.find('img')
if print_found: if item.string is not None:
print_url = 'http://www.csmonitor.com' + print_found[0] str = item.string
print_soup = self.index_to_soup(print_url) item.replaceWith(str)
else: else:
self.append_page(soup, soup.body, 3) if limg:
item.name = 'div'
trash_a = soup.findAll(attrs={'class': re.compile('navigation.*')}) item.attrs = []
trash_b = soup.findAll(attrs={'style': re.compile('.*')}) else:
trash_d = soup.findAll(attrs={'class': 'sByline'}) str = self.tag_to_string(item)
for ta in trash_a: ta.extract() item.replaceWith(str)
for tb in trash_b: tb.extract() for item in soup.findAll('img'):
for td in trash_d: td.extract() if 'scorecardresearch' in item['src']:
item.extract()
print_soup = soup else:
return print_soup if not item.has_key('alt'):
item['alt'] = 'image'
extra_css = ''' return soup
h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
.sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
.byline{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
.postdate{color:#999999 ; font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
h3{color:#999999 ; font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
.photoCutline{ color:#333333 ; font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
.photoCredit{ color:#999999 ; font-family:Arial,Helvetica,sans-serif ; font-size: x-small; }
#story{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
#main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
#photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;} '''
feeds = [(u'Top Stories', u'http://rss.csmonitor.com/feeds/top'),
(u'World' , u'http://rss.csmonitor.com/feeds/world'),
(u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
(u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
(u'Money' , u'http://rss.csmonitor.com/feeds/wam'),
(u'Learning' , u'http://rss.csmonitor.com/feeds/learning'),
(u'Living', u'http://rss.csmonitor.com/feeds/living'),
(u'Innovation', u'http://rss.csmonitor.com/feeds/scitech'),
(u'Gardening', u'http://rss.csmonitor.com/feeds/gardening'),
(u'Environment',u'http://rss.csmonitor.com/feeds/environment'),
(u'Arts', u'http://rss.csmonitor.com/feeds/arts'),
(u'Books', u'http://rss.csmonitor.com/feeds/books'),
(u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
]
keep_only_tags = [dict(name='div', attrs={'id':'mainColumn'}), ]
remove_tags = [
dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
dict(name=['div','a'], attrs={'class':
['storyToolbar cfx','podStoryRel','spacer3',
'divvy spacer7','comment','storyIncludeBottom',
'hide', 'podBrdr']}),
dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
dict(name='form', attrs={'id':[ 'commentform']}) ,
dict(name='div', attrs={'class': ['ui-comments']})
]
remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']}),
dict(name='div', attrs={'class': [re.compile('navigation.*')]}),
dict(name='div', attrs={'style': [re.compile('.*')]})
]

View File

@ -7,7 +7,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
description = 'News as provided by The Daily Mirror -UK' description = 'News as provided by The Daily Mirror -UK'
__author__ = 'Dave Asbury' __author__ = 'Dave Asbury'
# last updated 28/4/12 # last updated 8/6/12
language = 'en_GB' language = 'en_GB'
#cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg' #cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
@ -28,7 +28,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
dict(name='div',attrs={'class' : 'lead-text'}), dict(name='div',attrs={'class' : 'lead-text'}),
dict(name='div',attrs={'class' : 'styleGroup clearfix'}), dict(name='div',attrs={'class' : 'styleGroup clearfix'}),
dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}), dict(name='div',attrs={'class' : 'widget relatedContents pictures widget-editable viziwyg-section-245 inpage-widget-158123'}),
dict(name='figure',attrs={'class' : 'clearfix'}), # dict(name='figure',attrs={'class' : 'clearfix'}),
dict(name='div',attrs={'class' :'body '}), dict(name='div',attrs={'class' :'body '}),
#dict(attrs={'class' : ['article-attr','byline append-1','published']}), #dict(attrs={'class' : ['article-attr','byline append-1','published']}),
@ -37,6 +37,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(attrs={'class' : ['article sa-teaser type-opinion','image-gallery','gallery-caption']}),
dict(attrs={'class' : 'comment'}), dict(attrs={'class' : 'comment'}),
dict(name='title'), dict(name='title'),
dict(name='ul',attrs={'class' : 'clearfix breadcrumbs '}), dict(name='ul',attrs={'class' : 'clearfix breadcrumbs '}),
@ -89,6 +90,3 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
#cover_url = cov2 #cover_url = cov2
#cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png' #cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
return cover_url return cover_url

View File

@ -1,3 +1,4 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ElMundoTodayRecipe(BasicNewsRecipe): class ElMundoTodayRecipe(BasicNewsRecipe):
@ -7,11 +8,32 @@ class ElMundoTodayRecipe(BasicNewsRecipe):
category = 'Noticias, humor' category = 'Noticias, humor'
cover_url = 'http://www.elmundotoday.com/wp-content/themes/EarthlyTouch/images/logo.png' cover_url = 'http://www.elmundotoday.com/wp-content/themes/EarthlyTouch/images/logo.png'
oldest_article = 30 oldest_article = 30
max_articles_per_feed = 30 max_articles_per_feed = 60
auto_cleanup = True auto_cleanup = False
no_stylesheets = True no_stylesheets = True
remove_javascript = True
language = 'es' language = 'es'
use_embedded_content = True use_embedded_content = False
preprocess_regexps = [
(re.compile(r'</title>.*<!--Begin Article Single-->', re.DOTALL),
lambda match: '</title><body>'),
#(re.compile(r'^\t{5}<a href.*Permanent Link to ">$'), lambda match: ''),
#(re.compile(r'\t{5}</a>$'), lambda match: ''),
(re.compile(r'<div class="social4i".*</body>', re.DOTALL),
lambda match: '</body>'),
]
keep_only_tags = [
dict(name='div', attrs={'class':'post-wrapper'})
]
remove_attributes = [ 'href', 'title', 'alt' ]
extra_css = '''
.antetitulo{font-variant:small-caps; font-weight:bold} .articleinfo{font-size:small}
img{margin-bottom:0.4em; display:block; margin-left:auto; margin-right:auto}
'''
feeds = [('El Mundo Today', 'http://www.elmundotoday.com/feed/')] feeds = [('El Mundo Today', 'http://www.elmundotoday.com/feed/')]

View File

@ -10,6 +10,7 @@ class Elektroda(BasicNewsRecipe):
category = 'electronics' category = 'electronics'
language = 'pl' language = 'pl'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True
remove_tags_before=dict(name='span', attrs={'class':'postbody'}) remove_tags_before=dict(name='span', attrs={'class':'postbody'})
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'}) remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
remove_tags=[dict(name='a', attrs={'href':'#top'})] remove_tags=[dict(name='a', attrs={'href':'#top'})]

View File

@ -1,5 +1,6 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.elpais.com www.elpais.com
''' '''
@ -7,23 +8,24 @@ www.elpais.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ElPais_RSS(BasicNewsRecipe): class ElPais_RSS(BasicNewsRecipe):
title = 'El Pais' title = u'El País'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'el periodico global en Castellano' description = u'Noticias de última hora sobre la actualidad en España y el mundo: política, economía, deportes, cultura, sociedad, tecnología, gente, opinión, viajes, moda, televisión, los blogs y las firmas de EL PAÍS. Además especiales, vídeos, fotos, audios, gráficos, entrevistas, promociones y todos los servicios de EL PAÍS.'
publisher = 'EDICIONES EL PAIS, S.L.' publisher = 'EDICIONES EL PAIS, S.L.'
category = 'news, politics, finances, world, spain' category = 'news, politics, finances, world, spain'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 200 max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'es' language = 'es'
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://www.elpais.com/im/tit_logo.gif' masthead_url = 'http://ep01.epimg.net/iconos/v1.x/v1.0/logos/cabecera_portada.png'
extra_css = """ extra_css = """
body{font-family: Georgia,"Times New Roman",Times,serif } h1{font-family: Georgia,"Times New Roman",Times,serif }
h3{font-family: Arial,Helvetica,sans-serif} #subtitulo_noticia, .firma, .figcaption{font-size: small}
body{font-family: Arial,Helvetica,Garuda,sans-serif}
img{margin-bottom: 0.4em; display:block} img{margin-bottom: 0.4em; display:block}
""" """
@ -34,49 +36,61 @@ class ElPais_RSS(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [dict(attrs={'class':['cabecera_noticia estirar','cabecera_noticia','','contenido_noticia']})] keep_only_tags = [
remove_tags = [ dict(attrs={'id':['titulo_noticia','subtitulo_noticia']})
dict(name=['meta','link','base','iframe','embed','object']) ,dict(attrs={'class':['firma','columna_texto','entrevista_p_r']})
,dict(attrs={'class':['info_complementa','estructura_2col_der','votos estirar','votos']}) ]
,dict(attrs={'id':'utilidades'}) remove_tags = [
dict(name=['meta','link','base','iframe','embed','object'])
,dict(attrs={'class':'disposicion_vertical'})
] ]
remove_tags_after = dict(attrs={'id':'utilidades'})
remove_attributes = ['lang','border','width','height']
feeds = [ feeds = [
(u'Lo ultimo' , u'http://www.elpais.com/rss/feed.html?feedId=17046') (u'Lo ultimo' , u'http://ep00.epimg.net/rss/tags/ultimas_noticias.xml')
,(u'America Latina' , u'http://www.elpais.com/rss/feed.html?feedId=17041') ,(u'America Latina' , u'http://elpais.com/tag/rss/latinoamerica/a/' )
,(u'Mexico' , u'http://www.elpais.com/rss/feed.html?feedId=17042') ,(u'Mexico' , u'http://elpais.com/tag/rss/mexico/a/' )
,(u'Europa' , u'http://www.elpais.com/rss/feed.html?feedId=17043') ,(u'Europa' , u'http://elpais.com/tag/rss/europa/a/' )
,(u'Estados Unidos' , u'http://www.elpais.com/rss/feed.html?feedId=17044') ,(u'Estados Unidos' , u'http://elpais.com/tag/rss/estados_unidos/a/' )
,(u'Oriente proximo' , u'http://www.elpais.com/rss/feed.html?feedId=17045') ,(u'Oriente proximo' , u'http://elpais.com/tag/rss/oriente_proximo/a/' )
,(u'Espana' , u'http://www.elpais.com/rss/feed.html?feedId=1002' ) ,(u'Andalucia' , u'http://ep00.epimg.net/rss/ccaa/andalucia.xml' )
,(u'Andalucia' , u'http://www.elpais.com/rss/feed.html?feedId=17057') ,(u'Catalunia' , u'http://ep00.epimg.net/rss/ccaa/catalunya.xml' )
,(u'Catalunia' , u'http://www.elpais.com/rss/feed.html?feedId=17059') ,(u'Comunidad Valenciana' , u'http://ep00.epimg.net/rss/ccaa/valencia.xml' )
,(u'Comunidad Valenciana' , u'http://www.elpais.com/rss/feed.html?feedId=17061') ,(u'Madrid' , u'http://ep00.epimg.net/rss/ccaa/madrid.xml' )
,(u'Madrid' , u'http://www.elpais.com/rss/feed.html?feedId=1016' ) ,(u'Pais Vasco' , u'http://ep00.epimg.net/rss/ccaa/paisvasco.xml' )
,(u'Pais Vasco' , u'http://www.elpais.com/rss/feed.html?feedId=17062') ,(u'Galicia' , u'http://ep00.epimg.net/rss/ccaa/galicia.xml' )
,(u'Galicia' , u'http://www.elpais.com/rss/feed.html?feedId=17063') ,(u'Sociedad' , u'http://ep00.epimg.net/rss/sociedad/portada.xml' )
,(u'Opinion' , u'http://www.elpais.com/rss/feed.html?feedId=1003' ) ,(u'Deportes' , u'http://ep00.epimg.net/rss/deportes/portada.xml' )
,(u'Sociedad' , u'http://www.elpais.com/rss/feed.html?feedId=1004' ) ,(u'Cultura' , u'http://ep00.epimg.net/rss/cultura/portada.xml' )
,(u'Deportes' , u'http://www.elpais.com/rss/feed.html?feedId=1007' ) ,(u'Cine' , u'http://elpais.com/tag/rss/cine/a/' )
,(u'Cultura' , u'http://www.elpais.com/rss/feed.html?feedId=1008' ) ,(u'Economía' , u'http://elpais.com/tag/rss/economia/a/' )
,(u'Cine' , u'http://www.elpais.com/rss/feed.html?feedId=17052') ,(u'Literatura' , u'http://elpais.com/tag/rss/libros/a/' )
,(u'Literatura' , u'http://www.elpais.com/rss/feed.html?feedId=17053') ,(u'Musica' , u'http://elpais.com/tag/rss/musica/a/' )
,(u'Musica' , u'http://www.elpais.com/rss/feed.html?feedId=17051') ,(u'Arte' , u'http://elpais.com/tag/rss/arte/a/' )
,(u'Arte' , u'http://www.elpais.com/rss/feed.html?feedId=17060') ,(u'Medio Ambiente' , u'http://elpais.com/tag/rss/medio_ambiente/a/' )
,(u'Tecnologia' , u'http://www.elpais.com/rss/feed.html?feedId=1005' ) ,(u'Tecnologia' , u'http://ep01.epimg.net/rss/tecnologia/portada.xml' )
,(u'Economia' , u'http://www.elpais.com/rss/feed.html?feedId=1006' ) ,(u'Ciencia' , u'http://ep00.epimg.net/rss/tags/c_ciencia.xml' )
,(u'Ciencia' , u'http://www.elpais.com/rss/feed.html?feedId=17068') ,(u'Salud' , u'http://elpais.com/tag/rss/salud/a/' )
,(u'Salud' , u'http://www.elpais.com/rss/feed.html?feedId=17074') ,(u'Ocio' , u'http://elpais.com/tag/rss/ocio/a/' )
,(u'Ocio' , u'http://www.elpais.com/rss/feed.html?feedId=17075') ,(u'Justicia y Leyes' , u'http://elpais.com/tag/rss/justicia/a/' )
,(u'Justicia y Leyes' , u'http://www.elpais.com/rss/feed.html?feedId=17069') ,(u'Guerras y conflictos' , u'http://elpais.com/tag/rss/conflictos/a/' )
,(u'Guerras y conflictos' , u'http://www.elpais.com/rss/feed.html?feedId=17070') ,(u'Politica' , u'http://ep00.epimg.net/rss/politica/portada.xml' )
,(u'Politica' , u'http://www.elpais.com/rss/feed.html?feedId=17073') ,(u'Opinion' , u'http://ep01.epimg.net/rss/politica/opinion.xml' )
] ]
def print_version(self, url): def get_article_url(self, article):
return url + '?print=1' url = BasicNewsRecipe.get_article_url(self, article)
if url and (not('/album/' in url) and not('/futbol/partido/' in url)):
return url
self.log('Skipping non-article', url)
return None
def get_cover_url(self):
soup = self.index_to_soup('http://elpais.com/')
for image in soup.findAll('img'):
if image['src'].endswith('elpaisTodayMiddle.jpg'):
sstr = image['src']
return sstr.replace('elpaisTodayMiddle.jpg', 'elpaisToday.jpg')
return None
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -12,8 +12,8 @@ class Gameplay_pl(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript= True remove_javascript= True
no_stylesheets= True no_stylesheets= True
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})] keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news', 'news_container']})]
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})] remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi', 'news_tagi']}), dict(attrs={'usemap':'#map'}), dict(name='a', attrs={'class':['pin-it-button', 'twitter-share-button']})]
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')] feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
def image_url_processor(self, baseurl, url): def image_url_processor(self, baseurl, url):

View File

@ -8,12 +8,17 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
#auto_cleanup = True
remove_javascript = True remove_javascript = True
def print_version(self,url): def print_version(self,url):
segments = url.split('/') if '/tips-for-making-desserts?' in url:
printURL = '/'.join(segments[0:3]) + '/print-this/' + '/'.join(segments[4:]) return None
return printURL segments = url.split('/')
segments[-1] = segments[-1].split('?')[0]
segments[-1] +='?page=all'
printURL = '/'.join(segments[0:3]) + '/print-this/' + segments[-1]
return printURL
def preprocess_html(self, soup): def preprocess_html(self, soup):
for alink in soup.findAll('a'): for alink in soup.findAll('a'):
@ -22,10 +27,19 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
alink.replaceWith(tstr) alink.replaceWith(tstr)
return soup return soup
feeds = [ (u'Recipes & Entertaining', u'http://www.goodhousekeeping.com/food/food-rss/?src=rss'),
(u'Home & House', u'http://www.goodhousekeeping.com/home/home-rss/?src=rss'), #feeds = [
(u'Diet & Health', u'http://www.goodhousekeeping.com/health/health-rss/?src=rss'), #(u'Food and Recipes', u'http://www.goodhousekeeping.com/rss/recipes/'),
(u'Beauty & Style', u'http://www.goodhousekeeping.com/beauty/beauty-rss/?src=rss'), #]
(u'Family & Pets', u'http://www.goodhousekeeping.com/family/family-rss/?src=rss'),
(u'Saving Money', u'http://www.goodhousekeeping.com/money/money-rss/?src=rss'),
] feeds = [
(u'Food and Recipes', u'http://www.goodhousekeeping.com/rss/recipes/'),
(u'Home and Organizing', u'http://www.goodhousekeeping.com/rss/home/'),
(u'Diet and Health', u'http://www.goodhousekeeping.com/rss/health/'),
(u'Beauty and Anti-Aging', u'http://www.goodhousekeeping.com/rss/beauty/'),
(u'Family and Relationships', u'http://www.goodhousekeeping.com/rss/family/'),
(u'Holidays', u'http://www.goodhousekeeping.com/rss/holidays/'),
(u'In the Test Kitchen', 'http://www.goodhousekeeping.com/rss/test-kitchen-blog/'),
]

View File

@ -12,13 +12,16 @@ class Gram_pl(BasicNewsRecipe):
no_stylesheets= True no_stylesheets= True
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}' extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png' cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])] remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info', 'entry-footer clearfix']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button', 'entry-comment-counter', 'snap_nopreview sharing robots-nocontent']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})] keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']}), dict(name='article')]
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'), feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')] (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'),
(u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'),
#(u'Kolektyw- Moto Games', u'http://www.motogames.gram.pl/news.rss')
]
def parse_feeds (self): def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self) feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds: for feed in feeds:
for article in feed.articles[:]: for article in feed.articles[:]:
if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper(): if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper():
@ -56,4 +59,4 @@ class Gram_pl(BasicNewsRecipe):
for a in soup('a'): for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href'] a['href']=self.index + a['href']
return soup return soup

View File

@ -1,13 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class GreenLinux(BasicNewsRecipe):
title = u'GreenLinux.pl'
__author__ = 'fenuks'
category = 'IT'
language = 'pl'
cover_url = 'http://lh5.ggpht.com/_xd_6Y9kXhEc/S8tjyqlfhfI/AAAAAAAAAYU/zFNTp07ZQko/top.png'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'Newsy', u'http://feeds.feedburner.com/greenlinux')]

View File

@ -1,16 +1,15 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.haaretz.com www.haaretz.com
''' '''
import re import re
from calibre import strftime import urllib
from time import gmtime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class HaaretzPrint_en(BasicNewsRecipe): class Haaretz_en(BasicNewsRecipe):
title = 'Haaretz - print edition' title = 'Haaretz'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East." description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
publisher = 'Haaretz' publisher = 'Haaretz'
@ -21,10 +20,16 @@ class HaaretzPrint_en(BasicNewsRecipe):
encoding = 'utf8' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'en_IL' language = 'en_IL'
needs_subscription = True
remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
PREFIX = 'http://www.haaretz.com' PREFIX = 'http://www.haaretz.com'
masthead_url = PREFIX + '/images/logos/logoGrey.gif' masthead_url = PREFIX + '/images/logos/HaaretzLogo.gif'
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } ' extra_css = """
body{font-family: Verdana,Arial,Helvetica,sans-serif }
h1, .articleBody {font-family: Georgia, serif}
.authorBar {font-size: small}
"""
preprocess_regexps = [(re.compile(r'</body>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</body></html>')] preprocess_regexps = [(re.compile(r'</body>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</body></html>')]
@ -44,53 +49,42 @@ class HaaretzPrint_en(BasicNewsRecipe):
feeds = [ feeds = [
(u'News' , PREFIX + u'/print-edition/news' ) (u'Headlines' , 'http://feeds.feedburner.com/haaretz/LBao' )
,(u'Opinion' , PREFIX + u'/print-edition/opinion' ) ,(u'Opinion' , 'http://feeds.feedburner.com/haaretz/opinions' )
,(u'Business' , PREFIX + u'/print-edition/business' ) ,(u'Defence and diplomacy' , 'http://feeds.feedburner.com/DefenseAndDiplomacy' )
,(u'Real estate' , PREFIX + u'/print-edition/real-estate' ) ,(u'National' , 'http://feeds.feedburner.com/haaretz/National' )
,(u'Sports' , PREFIX + u'/print-edition/sports' ) ,(u'International' , 'http://feeds.feedburner.com/InternationalRss' )
,(u'Travel' , PREFIX + u'/print-edition/travel' ) ,(u'Jewish World' , 'http://feeds.feedburner.com/JewishWorldRss' )
,(u'Books' , PREFIX + u'/print-edition/books' ) ,(u'Business' , 'http://feeds.feedburner.com/BusinessPrintRss' )
,(u'Food & Wine' , PREFIX + u'/print-edition/food-wine' ) ,(u'Real Estate' , 'http://feeds.feedburner.com/RealEstatePrintRss' )
,(u'Arts & Leisure', PREFIX + u'/print-edition/arts-leisure' ) ,(u'Features' , 'http://feeds.feedburner.com/FeaturesPrintRss' )
,(u'Features' , PREFIX + u'/print-edition/features' ) ,(u'Arts & Leisure' , 'http://feeds.feedburner.com/ArtsAndLeisureRss' )
,(u'Books' , 'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false')
,(u'Food & Wine' , 'http://feeds.feedburner.com/FoodAndWinePrintRss' )
,(u'Sports' , 'http://feeds.feedburner.com/haaretz/Sport' )
] ]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.PREFIX)
if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'cb':'parseEngReply'
,'newsso':'true'
,'fromlogin':'true'
,'layer':'eng_login'
,'userName':self.username
,'password':self.password
})
br.open('https://sso.haaretz.com/sso/sso/signIn',data)
return br
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
return self.browser.open_novisit(url).geturl()
def print_version(self, url): def print_version(self, url):
article = url.rpartition('/')[2] article = url.rpartition('/')[2]
return 'http://www.haaretz.com/misc/article-print-page/' + article return 'http://www.haaretz.com/misc/article-print-page/' + article
def parse_index(self): def preprocess_raw_html(self, raw, url):
totalfeeds = [] return '<html><head>'+raw[raw.find('</head>'):]
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll(attrs={'class':'text'}):
sp = item.find('span',attrs={'class':'h3 font-weight-normal'})
desc = item.find('p')
description = ''
if sp:
if desc:
description = self.tag_to_string(desc)
link = sp.a
url = self.PREFIX + link['href']
title = self.tag_to_string(link)
times = strftime('%a, %d %b %Y %H:%M:%S +0000',gmtime())
articles.append({
'title' :title
,'date' :times
,'url' :url
,'description':description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -8,15 +8,21 @@ class Historia_org_pl(BasicNewsRecipe):
category = 'history' category = 'history'
language = 'pl' language = 'pl'
oldest_article = 8 oldest_article = 8
remove_empty_feeds=True remove_empty_feeds= True
no_stylesheets = True
use_embedded_content = True
max_articles_per_feed = 100 max_articles_per_feed = 100
feeds = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=rss'), feeds = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=atom'),
(u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=rss'), (u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=atom'),
(u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=rss'), (u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=atom'),
(u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=rss'), (u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=atom'),
(u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=rss'), (u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=atom'),
(u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=rss'), (u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=atom'),
(u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=rss'), (u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=atom'),
(u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=rss'), (u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=atom'),
(u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=rss')] (u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=atom')]
def print_version(self, url):
return url + '?tmpl=component&print=1&layout=default&page='

Binary file not shown.

After

Width:  |  Height:  |  Size: 326 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 312 B

View File

@ -15,6 +15,10 @@ class TheIndependentNew(BasicNewsRecipe):
#Flag to enable/disable image fetching (not business) #Flag to enable/disable image fetching (not business)
_FETCH_IMAGES = True _FETCH_IMAGES = True
#Set max gallery images here (respects _FETCH_IMAGES)
# -1 for infinite
_MAX_GALLERY_IMAGES = -1
#used for converting rating to stars #used for converting rating to stars
_STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png' _STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png'
@ -41,6 +45,7 @@ class TheIndependentNew(BasicNewsRecipe):
dict(attrs={'id' : ['RelatedArtTag','renderBiography']}), dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
dict(attrs={'class' : ['autoplay','openBiogPopup']}), dict(attrs={'class' : ['autoplay','openBiogPopup']}),
dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}), dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
dict(name='img',attrs={'alt' : ['view gallery']}),
dict(attrs={'style' : re.compile('.*')}), dict(attrs={'style' : re.compile('.*')}),
] ]
@ -119,15 +124,15 @@ class TheIndependentNew(BasicNewsRecipe):
if len(para.contents) and isinstance(para.contents[0],NavigableString) \ if len(para.contents) and isinstance(para.contents[0],NavigableString) \
and para.contents[0] == 'ADVERTORIAL FEATURE': and para.contents[0] == 'ADVERTORIAL FEATURE':
return None return None
# remove Suggested Topics # remove Suggested Topics
items_to_extract = [] items_to_extract = []
for item in soup.findAll('div',attrs={'class' : re.compile('.*RelatedArtTag.*')}): for item in soup.findAll('div',attrs={'class' : re.compile('.*RelatedArtTag.*')}):
items_to_extract.append(item) items_to_extract.append(item)
for item in items_to_extract: for item in items_to_extract:
item.extract() item.extract()
items_to_extract = [] items_to_extract = []
slideshow_elements = [] slideshow_elements = []
@ -171,25 +176,43 @@ class TheIndependentNew(BasicNewsRecipe):
for item in element.findAll('a',attrs={'href' : re.compile('.*')}): for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
if item.img is not None: if item.img is not None:
#use full size image #use full size image
images = []
img = item.findNext('img') img = item.findNext('img')
img['src'] = item['href'] if not '?action=gallery' in item['href']:
img['src'] = item['href']
#insert caption if available
if img.get('title') and (len(img['title']) > 1):
tag = Tag(soup,'h3') tag = Tag(soup,'h3')
text = NavigableString(img['title']) text = ''
try:
text = img['data-title']
except:
pass
if img.get('title') and (len(img['title']) > 1):
text = NavigableString(img['title'])
tag.insert(0,text) tag.insert(0,text)
images.append((img, tag))
#picture before text else:
gallery_images, remove_link = self._get_gallery_images(item['href'])
images = images + gallery_images
if remove_link:
gal_link = soup.find('a',attrs={'id' : 'view-gallery'})
if gal_link:
gal_link.extract()
img.extract() img.extract()
item.insert(0,img) for (img, title) in images:
item.insert(1,tag) #insert caption if available
if title:
#picture before text
img.extract()
item.insert(0,img)
item.insert(1,title)
# remove link # remove link
item.name = "div" item.name = "div"
item["class"]='image' item["class"]='image'
del item["href"] del item["href"]
#remove empty subtitles #remove empty subtitles
@ -317,13 +340,51 @@ class TheIndependentNew(BasicNewsRecipe):
for item in items_to_extract: for item in items_to_extract:
item.extract() item.extract()
# nickredding's fix for non-justified text # nickredding's fix for non-justified text
for ptag in soup.findAll('p',attrs={'align':'left'}): for ptag in soup.findAll('p',attrs={'align':'left'}):
del(ptag['align']) del(ptag['align'])
return soup return soup
def _get_gallery_images(self,url):
gallery_soup = self.index_to_soup(url)
images = []
remove_link = True
total = 1
try:
counter = gallery_soup.find('div',attrs={'id' : ['counter']})
total = counter.contents[0].split('/')
total = int(total[1].rstrip())
except:
total = 1
if self._MAX_GALLERY_IMAGES >= 0 and total > self._MAX_GALLERY_IMAGES:
total = self._MAX_GALLERY_IMAGES
remove_link = False
for i in range(1, total +1):
image, title = self._get_image_from_gallery(gallery_soup)
if image:
images.append((image,title))
next = url + '&ino=' + str(i + 1)
gallery_soup = self.index_to_soup(next)
images.reverse()
return images, remove_link
def _get_image_from_gallery(self,soup):
try:
container = soup.find('div',attrs={'id' : ['main-image']})
image = container.find('img')
if image:
title = soup.find('div',attrs={'id' : ['image-title']})
return image, title
except:
print 'error fetching gallery image'
return None
def _recurisvely_linearise_tag_tree( def _recurisvely_linearise_tag_tree(
self, self,
item, item,

View File

@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324038402(BasicNewsRecipe): class AdvancedUserRecipe1324038402(BasicNewsRecipe):
title = u'La Gazzetta del Mezzogiorno' title = u'La Gazzetta del Mezzogiorno'
language = 'it'
__author__ = 'faber1971' __author__ = 'faber1971'
description = 'Italian regional magazine - Apulia' description = 'Italian regional magazine - Apulia'
oldest_article = 1 oldest_article = 1

View File

@ -0,0 +1,77 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
import time
class MalayaBusinessInsight(BasicNewsRecipe):
title = u'Malaya Business Insight'
custom_title = "Malaya Business Insight - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '07 June 2012'
__version__ = '1.2'
description = "The Malaya Business Insight is a broadsheet newspaper in the Philippines. The newspaper's name was derived from the Filipino word that means 'freedom'."
language = 'en_PH'
publisher = 'Malaya Business Insight'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.malaya.com.ph/templates/ja_teline_iv/images/logo.png'
masthead_url = 'http://www.malaya.com.ph/templates/ja_teline_iv/images/logo.png'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
keep_only_tags = [
dict(name='div', attrs={'id':'ja-main'})
]
remove_tags = [
dict(name='a', attrs={'class':'ja-back-btn'})
,dict(name='li', attrs={'class':'print-icon'})
,dict(name='li', attrs={'class':'email-icon'})
,dict(name='p', attrs={'class':'dnn'})
,dict(name='span', attrs={'class':'breadcrumbs pathway'})
,dict(name='dt', attrs={'class':'article-info-term'})
,dict(name='div', attrs={'class':'ja-articles-mainwrap'})
,dict(name='h1', attrs={'class':'componentheading'})
,dict(name='div', attrs={'id':'ja-content-mass-top'})
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
(u'Business', u'http://www.malaya.com.ph/index.php/business?format=feed&amp;type=rss')
, (u'Market', u'http://www.malaya.com.ph/index.php/business/market?format=feed&amp;type=rss')
, (u'Shipping and Transportation', u'http://www.malaya.com.ph/index.php/business/shipping-and-transportation?format=feed&amp;type=rss')
, (u'Business Incidental', u'http://www.malaya.com.ph/index.php/business/business-incidental?format=feed&amp;type=rss')
, (u'Banking and Finance', u'http://www.malaya.com.ph/index.php/special-features/banking-and-finance?format=feed&amp;type=rss')
, (u'Motoring', u'http://www.malaya.com.ph/index.php/special-features/motoring?format=feed&amp;type=rss')
, (u'Info Tech - Telecoms', u'http://www.malaya.com.ph/index.php/special-features/infotech-telecoms?format=feed&amp;type=rss')
, (u'Property', u'http://www.malaya.com.ph/index.php/special-features/property?format=feed&amp;type=rss')
, (u'Environment', u'http://www.malaya.com.ph/index.php/special-features/environment?format=feed&amp;type=rss')
, (u'Agriculture', u'http://www.malaya.com.ph/index.php/special-features/agriculture?format=feed&amp;type=rss')
, (u'News - National', u'http://www.malaya.com.ph/index.php/news/nation?format=feed&amp;type=rss')
, (u'News - International', u'http://www.malaya.com.ph/index.php/news/international?format=feed&amp;type=rss')
, (u'Sports', u'http://www.malaya.com.ph/index.php/sports?format=feed&amp;type=rss')
, (u'Entertainment', u'http://www.malaya.com.ph/index.php/entertainment?format=feed&amp;type=rss')
, (u'Living', u'http://www.malaya.com.ph/index.php/living?format=feed&amp;type=rss')
, (u'Opinion', u'http://www.malaya.com.ph/index.php/opinion?format=feed&amp;type=rss')
]

View File

@ -0,0 +1,54 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
import time
class ManilaStandardToday(BasicNewsRecipe):
title = u'Manila Standard Today'
custom_title = "Manila Standard Today - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = 'The Manila Standard Today is the fourth-largest broadsheet newspaper in the Philippines as of 2006. Initially established as the Manila Standard, it merged with another newspaper of record, Today, on March 6, 2005. It was the first newspaper merger in the Philippines.'
language = 'en_PH'
publisher = 'Manila Standard Today'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.manilastandardtoday.com/wp-content/uploads/Manila-Standard-Today-June-06-12.jpg'
masthead_url = 'http://www.manilastandardtoday.com/wp-content/uploads/Manila-Standard-Today-June-06-12.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
keep_only_tags = [
dict(name='div', attrs={'id':'main'})
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
(u'Headlines', u'http://news.manilastandardtoday.com/feed/')
, (u'Nation', u'http://news.manilastandardtoday.com/archives/nation/feed/')
, (u'Business', u'http://business.manilastandardtoday.com/feed/')
, (u'Metro', u'http://news.manilastandardtoday.com/archives/metro/feed/')
, (u'Sports', u'http://sports.manilastandardtoday.com/feed/')
, (u'Entertainment', u'http://entertainment.manilastandardtoday.com/feed/')
, (u'Opinion', u'http://opinion.manilastandardtoday.com/feed/')
, (u'Lifestyle', u'http://lifestyle.manilastandardtoday.com/feed/')
]

View File

@ -4,6 +4,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
description = 'News as provide by The Metro -UK' description = 'News as provide by The Metro -UK'
#timefmt = '' #timefmt = ''
__author__ = 'Dave Asbury' __author__ = 'Dave Asbury'
#last update 9/6/12
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg' cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
#no_stylesheets = True #no_stylesheets = True
oldest_article = 1 oldest_article = 1
@ -11,7 +12,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True remove_javascript = True
auto_cleanup = True auto_cleanup = True
encoding = 'UTF-8'
language = 'en_GB' language = 'en_GB'
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif' masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'

View File

@ -1,18 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class naczytniki(BasicNewsRecipe):
title = u'naczytniki.pl'
__author__ = 'fenuks'
masthead_url= 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
cover_url = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
language = 'pl'
description ='everything about e-readers'
category='e-readers'
no_stylesheets=True
use_embedded_content=False
oldest_article = 7
max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
keep_only_tags=[dict(name='div', attrs={'class':'post'})]
remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
feeds = [(u'Wpisy', u'http://naczytniki.pl/?feed=rss2')]

15
recipes/natemat_pl.recipe Normal file
View File

@ -0,0 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NaTemat(BasicNewsRecipe):
title = u'NaTemat.pl'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'fenuks'
description = u'informacje, komentarze, opinie'
category = 'news'
language = 'pl'
cover_url= 'http://blog.plona.pl/wp-content/uploads/2012/05/natemat.png'
no_stylesheets = True
keep_only_tags= [dict(id='main')]
remove_tags= [dict(attrs={'class':['button', 'block-inside style_default', 'article-related']})]
feeds = [(u'Artyku\u0142y', u'http://natemat.pl/rss/wszystkie')]

View File

@ -1,23 +1,47 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser
class AdvancedUserRecipe1306061239(BasicNewsRecipe): class AdvancedUserRecipe1306061239(BasicNewsRecipe):
title = u'New Musical Express Magazine' title = u'New Musical Express Magazine'
__author__ = "scissors" description = 'Author D.Asbury. UK Rock & Pop Mag. '
language = 'en' __author__ = 'Dave Asbury'
# last updated 9/6/12
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 20
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg' #auto_cleanup = True
language = 'en_GB'
def get_cover_url(self):
soup = self.index_to_soup('http://www.magazinesdirect.com/categories/mens/tv-and-music/')
cov = soup.find(attrs={'title' : 'NME magazine subscriptions'})
cov2 = 'http://www.magazinesdirect.com'+cov['src']
print '***cov = ',cov2,' ***'
cover_url = str(cov2)
# print '**** Cov url =*', cover_url,'***'
#print '**** Cov url =*','http://www.magazinesdirect.com/article_images/articledir_3138/1569221/1_largelisting.jpg','***'
br = browser()
br.set_handle_redirect(False)
try:
br.open_novisit(cov2)
cover_url = str(cov2)
except:
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
return cover_url
masthead_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
remove_tags = [ remove_tags = [
dict( attrs={'class':'clear_icons'}), dict( attrs={'class':'clear_icons'}),
dict( attrs={'class':'share_links'}), dict( attrs={'class':'share_links'}),
dict( attrs={'id':'right_panel'}), dict( attrs={'id':'right_panel'}),
dict( attrs={'class':'today box'}) dict( attrs={'class':'today box'}),
]
]
keep_only_tags = [ keep_only_tags = [
@ -28,7 +52,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
dict(attrs={'class' : 'bPosts'}), dict(attrs={'class' : 'bPosts'}),
dict(attrs={'class' : 'text'}), dict(attrs={'class' : 'text'}),
dict(attrs={'id' : 'article_gallery'}), dict(attrs={'id' : 'article_gallery'}),
#dict(attrs={'class' : 'image'}),
dict(attrs={'class' : 'article_text'}) dict(attrs={'class' : 'article_text'})
] ]
@ -36,7 +62,8 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
feeds = [ feeds = [
(u'NME News', u'http://feeds2.feedburner.com/nmecom/rss/newsxml'), (u'NME News', u'http://feeds2.feedburner.com/nmecom/rss/newsxml'),
(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'), #(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
(u'Blogs', u'http://www.nme.com/blog/index.php?blog=140&tempskin=_rss2'), (u'Reviews',u'http://feed43.com/4138608576351646.xml'),
(u'Bloggs',u'http://feed43.com/3326754333186048.xml'),
] ]

View File

@ -11,7 +11,7 @@ class OCLab(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
keep_only_tags=[dict(id='main')] keep_only_tags=[dict(id='main')]
remove_tags_after= dict(attrs={'class':'single-postmetadata'}) remove_tags_after= dict(attrs={'class':'single-postmetadata'})
remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar']})] remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-enjoy']})]
feeds = [(u'Wpisy', u'http://oclab.pl/feed/')] feeds = [(u'Wpisy', u'http://oclab.pl/feed/')]

View File

@ -0,0 +1,73 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class PilipinoStarNgayon(BasicNewsRecipe):
title = 'Pilipino Star Ngayon'
custom_title = "Pilipino Star Ngayon - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'A daily Tabloid written in Tagalog, distributed in the Philippines. A tabloid style newspaper published in the national language - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'tgl'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_PSN.jpg'
masthead_url = 'http://www.philstar.com/images/logo_PSN.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Litra-talk' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=535' )
,('Bansa' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=92' )
,('Probinsiya' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=49' )
,('Metro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=93' )
,('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=94' )
,('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=95' )
,('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=96' )
,('True Confessions' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=97' )
,('Dr. Love' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=98' )
,('Kutob' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=99' )
,('Komiks' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=100' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class Polska_times(BasicNewsRecipe): class Polska_times(BasicNewsRecipe):
title = u'Polska Times' title = u'Polska Times'
__author__ = 'fenuks' __author__ = 'fenuks'
@ -11,71 +10,20 @@ class Polska_times(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_emty_feeds= True remove_emty_feeds= True
no_stylesheets = True no_stylesheets = True
preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ] #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
keep_only_tags= [dict(id=['tytul-artykulu', 'kontent'])] remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
remove_tags_after= dict(id='material-tagi') remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
remove_tags=[dict(attrs={'id':'reklama_srodtekst_0'}), dict(attrs={'id':'material-tagi'}), dict(name='div', attrs={'class':'zakladki'}), dict(attrs={'title':u'CZYTAJ TAKŻE'}), dict(attrs={'id':'podobne'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/newsletter'})]
feeds = [(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'), (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'), (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'), (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'), (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'), (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'), (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')] feeds = [(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'), (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'), (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'), (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'), (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'), (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'), (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
def print_version(self, url):
return url.replace('artykul', 'drukuj')
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
if 'Advertisement' in soup.title: if 'Advertisement' in soup.title:
nexturl=soup.find('a')['href'] nexturl=soup.find('a')['href']
return self.index_to_soup(nexturl, raw=True) return self.index_to_soup(nexturl, raw=True)
def append_page(self, soup, appendtag):
nexturl=soup.find(id='nastepna_strona')
while nexturl:
soup2= self.index_to_soup(nexturl['href'])
nexturl=soup2.find(id='nastepna_strona')
pagetext = soup2.find(id='tresc')
for dictionary in self.remove_tags:
v=pagetext.findAll(attrs=dictionary['attrs'])
for delete in v:
delete.extract()
for b in pagetext.findAll(name='b'):
if b.string:
if u'CZYTAJ TEŻ' in b.string or u'Czytaj także' in b.string or u'Czytaj też' in b.string or u'Zobacz także' in b.string:
b.extract()
for center in pagetext.findAll(name='center'):
if center.h4:
if center.h4.a:
center.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
paginator.extract()
def image_article(self, soup, appendtag):
nexturl=soup.find('a', attrs={'class':'nastepna'})
urls=[]
while nexturl:
if nexturl not in urls:
urls.append(nexturl)
else:
break
soup2= self.index_to_soup('http://www.polskatimes.pl/artykul/' + nexturl['href'])
nexturl=soup2.find('a', attrs={'class':'nastepna'})
if nexturl in urls:
break;
pagetext = soup2.find(id='galeria-material')
pos = len(appendtag.contents)
appendtag.insert(pos, '<br />')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for rem in appendtag.findAll(attrs={'class':['galeriaNawigator', 'miniaturyPojemnik']}):
rem.extract()
for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
paginator.extract()
def preprocess_html(self, soup):
if soup.find('a', attrs={'class':'nastepna'}):
self.image_article(soup, soup.body)
elif soup.find(id='nastepna_strona'):
self.append_page(soup, soup.body)
return soup
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/') soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/')
self.cover_url=soup.find(id='pojemnik').img['src'] self.cover_url=soup.find(id='pojemnik').img['src']
return getattr(self, 'cover_url', self.cover_url) return getattr(self, 'cover_url', self.cover_url)

View File

@ -0,0 +1,70 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class Freeman(BasicNewsRecipe):
title = 'The Freeman'
custom_title = "The Freeman - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'The Freeman is a daily English-language newspaper published in Cebu, Philippines, by the Philippine Star. It was the first newspaper in Cebu, first published in May 1919. The motto of the newspaper is "The fair and fearless" - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'en_PH'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
masthead_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Cebu News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=107' )
,('Freeman Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=109' )
,('Metro Cebu' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=531' )
,('Region' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=530' )
,('Cebu Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=108' )
,('Cebu Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=110' )
,('Cebu Lifestyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=111' )
,('Cebu Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=51' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -0,0 +1,88 @@
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheManilaBulletin(BasicNewsRecipe):
title = u'The Manila Bulletin'
custom_title = "The Manila Bulletin - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation."
language = 'en_PH'
publisher = 'The Manila Bulletin'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
masthead_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
keep_only_tags = [
dict(name='div', attrs={'class':'article node'})
,dict(name='div', attrs={'class':'label'})
,dict(name='div', attrs={'class':'content clear-block'})
]
remove_tags = [
dict(name='li', attrs={'class':'print_html'})
,dict(name='li', attrs={'class':'print_html first'})
,dict(name='li', attrs={'class':'print_mail'})
,dict(name='li', attrs={'class':'print_mail last'})
,dict(name='div', attrs={'class':'article-sidebar'})
,dict(name='table', attrs={'id':'attachments'})
]
auto_cleanup = False
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
(u'Main News', u'http://www.mb.com.ph/feed/news/main')
# , (u'Regional', u'http://www.mb.com.ph/feed/news/regional')
, (u'Business', u'http://www.mb.com.ph/feed/business')
, (u'Sports', u'http://www.mb.com.ph/feed/sports')
, (u'Entertainment', u'http://www.mb.com.ph/feed/entertainment')
, (u'Opinion', u'http://www.mb.com.ph/feed/news/opinion')
# , (u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture')
# , (u'Environment', u'http://www.mb.com.ph/feed/news/environment')
, (u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology')
, (u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle')
# , (u'Arts & Living', u'http://www.mb.com.ph/feed/lifestyle/arts-and-living')
# , (u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive')
# , (u'Food', u'http://www.mb.com.ph/feed/lifestyle/food')
# , (u'Travel', u'http://www.mb.com.ph/feed/lifestyle/travel')
# , (u'Picture Perfect', u'http://www.mb.com.ph/feed/lifestyle/picture-perfect')
]
# if use print version - convert url
# http://www.mb.com.ph/articles/361252/higher-power-rate-looms
# http://www.mb.com.ph/print/361252
#
# def print_version(self,url):
# segments = url.split('/')
# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
# return printURL

View File

@ -0,0 +1,55 @@
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheManilaTimes(BasicNewsRecipe):
title = u'The Manila Times'
custom_title = "The Manila Times - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = 'The Manila Times is the oldest existing English language newspaper in the Philippines.'
language = 'en_PH'
publisher = 'The Manila Times'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.manilatimes.net/images/banners/logo-mt.png'
masthead_url = 'http://www.manilatimes.net/images/banners/logo-mt.png'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
remove_tags = [
dict(name='img', attrs={'alt':'Print'})
,dict(name='img', attrs={'alt':'Email:'})
,dict(name='dd', attrs={'class':'hits'})
]
auto_cleanup = True
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [(u'Breaking News', u'http://www.manilatimes.net/index.php/news/breaking-news?format=feed&amp;type=rss'), (u'Top Stories', u'http://www.manilatimes.net/index.php/news/top-stories?format=feed&amp;type=rss'), (u'Headlines', u'http://www.manilatimes.net/index.php/news/headlines-mt?format=feed&amp;type=rss'), (u'Nation', u'http://www.manilatimes.net/index.php/news/nation?format=feed&amp;type=rss'), (u'Regions', u'http://www.manilatimes.net/index.php/news/regions?format=feed&amp;type=rss'), (u'World', u'http://www.manilatimes.net/index.php/news/world?format=feed&amp;type=rss'), (u'Top Business News', u'http://www.manilatimes.net/index.php/business/top-business-news?format=feed&amp;type=rss'), (u'Business Columnist', u'http://www.manilatimes.net/index.php/business/business-columnist?format=feed&amp;type=rss'), (u'Opinion - Editorials', u'http://www.manilatimes.net/index.php/opinion/editorials?format=feed&amp;type=rss'), (u'Opinion - Columnist', u'http://www.manilatimes.net/index.php/opinion/columnist1?format=feed&amp;type=rss'), (u'Opinion - Editorial Cartoon', u'http://www.manilatimes.net/index.php/opinion/editorial-cartoon?format=feed&amp;type=rss'), (u'Top Sports News', u'http://www.manilatimes.net/index.php/sports/top-sports-news?format=feed&amp;type=rss'), (u'Sports Columnist', u'http://www.manilatimes.net/index.php/sports/sports-columnist?format=feed&amp;type=rss'), (u'Life & Times', u'http://www.manilatimes.net/index.php/life-and-times?format=feed&amp;type=rss'), (u'Showtime', u'http://www.manilatimes.net/index.php/life-and-times/showtime?format=feed&amp;type=rss'), (u'Sunday Times', u'http://www.manilatimes.net/index.php/sunday-times?format=feed&amp;type=rss'), (u'Sunday Times Magazine', u'http://www.manilatimes.net/index.php/sunday-times/the-sunday-times-magazines?format=feed&amp;type=rss'), (u'Motoring News', u'http://www.manilatimes.net/index.php/fast-times/motoring-news?format=feed&amp;type=rss'), (u'Motoring Columnist', u'http://www.manilatimes.net/index.php/fast-times/motoring-columnist?format=feed&amp;type=rss'), (u'Technology', u'http://www.manilatimes.net/index.php/technology?format=feed&amp;type=rss')]

View File

@ -0,0 +1,129 @@
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class PhilippineDailyInquirer(BasicNewsRecipe):
title = 'The Philippine Daily Inquirer'
custom_title = "The Philippine Daily Inquirer - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '03 June 2012'
__version__ = '1.0'
description = 'The Philippine Daily Inquirer is a widely read and circulated newspaper.'
language = 'en_PH'
publisher = 'The Philippine Daily Inquirer'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.inquirer.com.ph/assets/bg/logo.jpg'
masthead_url = 'http://www.inquirer.com.ph/assets/bg/logo.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags_after = [
dict(name='div', attrs={'id':'entryMeta'})
,dict(name='div', attrs={'id':'taboola-div'})
,dict(name='br', attrs={'class':'clear'})
]
remove_tags = [
dict(name='div', attrs={'class':'recent'})
,dict(name='div', attrs={'id':'sharefeature'})
,dict(name='div', attrs={'id':'masthead_bg'})
,dict(name='div', attrs={'id':'navmenu_main'})
,dict(name='div', attrs={'id':'navmenu_channel'})
,dict(name='div', attrs={'class':'breadcrumbs'})
,dict(name='div', attrs={'id':'search_container'})
,dict(name='a', attrs={'href':'http://ruby.inquirer.net/redirect/redirect.php?item_id=1143'})
,dict(name='a', attrs={'href':'http://ruby.inquirer.net/redirect/redirect.php?item_id=1147'})
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Headlines' , 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed' )
,('Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/feed' )
,('Nation' , 'http://newsinfo.inquirer.net/category/nation/feed' )
,('Nation - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed' )
,('Metro' , 'http://newsinfo.inquirer.net/category/metro/feed' )
,('Metro - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed' )
,('Regions' , 'http://newsinfo.inquirer.net/category/regions/feed' )
,('Regions - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed' )
# ,('News' , 'http://www.inquirer.net/fullfeed' )
# ,('More News' , 'http://newsinfo.inquirer.net/feed' )
,('Global Nation' , 'http://globalnation.inquirer.net/feed' )
,('Global Nation - Latest Stories' , 'http://globalnation.inquirer.net/category/latest-stories/feed' )
,('Global Nation - Philippines' , 'http://globalnation.inquirer.net/category/news/philippines/feed' )
,('Global Nation - Asia & Pacific' , 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed' )
,('Global Nation - Americas' , 'http://globalnation.inquirer.net/category/news/uscanada/feed' )
,('Global Nation - Middle East & Africa' , 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed' )
,('Global Nation - Europe' , 'http://globalnation.inquirer.net/category/news/europe/feed' )
,('Global Nation - Global Pinoy' , 'http://globalnation.inquirer.net/category/global-pinoy/feed' )
,('Global Nation - Events' , 'http://globalnation.inquirer.net/category/events/feed' )
,('Business' , 'http://business.inquirer.net/feed' )
,('Business - Latest Stories' , 'http://business.inquirer.net/category/latest-stories/feed' )
,('Business - Money' , 'http://business.inquirer.net/category/money/feed' )
,('Business - Science & Health' , 'http://business.inquirer.net/category/science-and-health/feed' )
,('Business - Motoring' , 'http://business.inquirer.net/category/motoring/feed' )
,('Business - Property Guide' , 'http://business.inquirer.net/category/property-guide/feed' )
,('Business - Columnists' , 'http://business.inquirer.net/category/columnists/feed' )
,('Sports' , 'http://sports.inquirer.net/feed' )
,('Sports - Latest Stories' , 'http://sports.inquirer.net/category/latest-stories/feed' )
,('Sports - Basketball' , 'http://sports.inquirer.net/category/section/basketball/feed' )
,('Sports - Boxing & MMA' , 'http://sports.inquirer.net/category/section/boxing-mma/feed' )
,('Sports - Golf' , 'http://sports.inquirer.net/category/section/golf/feed' )
,('Sports - Football' , 'http://sports.inquirer.net/category/section/other-sports/football/feed' )
,('Sports - Other Sports' , 'http://sports.inquirer.net/category/section/other-sports/feed' )
,('Technology' , 'http://technology.inquirer.net/feed' )
,('Technology Latest Stories' , 'http://technology.inquirer.net/category/latest-stories/feed' )
,('Entertainment' , 'http://entertainment.inquirer.net/feed' )
,('Entertainment - Headlines' , 'http://entertainment.inquirer.net/category/headlines/feed' )
,('Entertainment - Latest Stories' , 'http://entertainment.inquirer.net/category/latest-stories/feed' )
,('Entertainment - Movies' , 'http://movies.inquirer.net/feed' )
,('Lifestyle' , 'http://lifestyle.inquirer.net/feed' )
,('Lifestyle - Latest Stories' , 'http://lifestyle.inquirer.net/category/latest-stories/feed' )
,('Lifestyle - Arts & Books' , 'http://lifestyle.inquirer.net/category/arts-and-books/feed' )
,('Lifestyle - Wellness' , 'http://lifestyle.inquirer.net/category/wellness/feed' )
,('Lifestyle - Home & Entertaining' , 'http://lifestyle.inquirer.net/category/home-and-entertaining/feed' )
,('Lifestyle - Parenting' , 'http://lifestyle.inquirer.net/category/parenting/feed' )
,('Lifestyle - Food' , 'http://lifestyle.inquirer.net/category/food/feed' )
,('Lifestyle - Fashion & Beauty' , 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed' )
,('Lifestyle - Super' , 'http://lifestyle.inquirer.net/category/super/feed' )
,('Lifestyle - 2BU' , 'http://lifestyle.inquirer.net/category/2bu/feed' )
,('Lifestyle - Sunday Lifestyle' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed' )
,('Lifestyle - Wedding' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed' )
,('Lifestyle - Travel' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed' )
,('Lifestyle - Relationship' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed' )
,('Opinion' , 'http://opinion.inquirer.net/feed' )
,('Opinion - Viewpoints' , 'http://opinion.inquirer.net/category/viewpoints/feed' )
,('Opinion - Talk of the Town' , 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed' )
,('Editorial' , 'http://opinion.inquirer.net/category/editorial/feed' )
,('Letters to the Editor' , 'http://opinion.inquirer.net/category/letters-to-the-editor/feed' )
,('Columns' , 'http://opinion.inquirer.net/category/columns/feed' )
,('Citizens Journalism' , 'http://newsinfo.inquirer.net/category/citizens-journalism/feed' )
,('Cebu - Daily News' , 'http://newsinfo.inquirer.net/category/cdn/feed' )
,('Cebu - More News' , 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed' )
,('Cebu - Community' , 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed' )
,('Cebu - Metro' , 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed' )
,('Cebu - Business' , 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed' )
,('Cebu - Sports' , 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed' )
,('Cebu - Visayas' , 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed' )
,('Cebu - Opinion' , 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed' )
]

View File

@ -0,0 +1,97 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class PhilippineStar(BasicNewsRecipe):
title = 'The Philippine Star'
custom_title = "The Philippine Star - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'The Philippine Star is a daily English-language broadsheet newspaper based in Manila. It has the most subscribers of any newspaper in the Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'en_PH'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/philstar-logo-white.jpg'
masthead_url = 'http://www.philstar.com/images/philstar-logo-white.jpg'
oldest_article = 1 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Headlines' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=63' )
,('Breaking News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=200' )
,('News Feature' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=68' )
,('Nation' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=67' )
,('Metro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=65' )
,('Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=66' )
,('Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=69' )
,('Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=70' )
,('Science & Technology' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=75' )
,('Networks' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=71' )
,('Business as Usual' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=78' )
,('Banking' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=74' )
,('Motoring' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=72' )
,('Real Estate' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=76' )
,('Telecoms' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=73' )
,('Agriculture' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=77' )
,('Arts & Culture' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=79' )
,('Food & Leisure' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=81' )
,('Health & Family' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=80' )
,('Education & Home' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=442' )
,('Travel & Tourism' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=87' )
,('Newsmakers' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=88' )
,('Business Life' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=82' )
,('Fashion & Beauty' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=83' )
,('For Men' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=446' )
,('Gadgets' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=449' )
,('Sunday Life' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=86' )
,('Supreme' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=448' )
,('Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=64' )
,('Letters to the Editor' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=135' )
,('Starweek Magazine' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=90' )
,('Modern Living' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=85' )
,('YStyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=451' )
,('Allure' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=89' )
,('Weather' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=116' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -13,10 +13,11 @@ class tvn24(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
keep_only_tags=[dict(id='tvn24_wiadomosci_detal'), dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']})] keep_only_tags=[dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']}), dict(attrs={'class':'mainLeftColumn'})]
remove_tags_after= dict(name='div', attrs={'class':'socialBoxesBottom'}) remove_tags=[dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text']})]
remove_tags=[dict(attrs={'class':['tagi_detal', 'socialBoxesBottom', 'twitterBox', 'commentsInfo', 'textSize', 'obj_ukrytydruk obj_ramka1_r', 'related newsNews align-right', 'box', 'newsUserList', 'watchMaterial text']})] #remove_tags_after= dict(attrs={'class':'articleAuthors mb30 mt5 grey_v6'})
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), (u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')] feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ]
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -1,4 +1,5 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ViceESRecipe(BasicNewsRecipe): class ViceESRecipe(BasicNewsRecipe):
@ -7,11 +8,33 @@ class ViceESRecipe(BasicNewsRecipe):
description = u'La página web oficial de la revista Vice España' description = u'La página web oficial de la revista Vice España'
category = u'noticias, fotografía, blogs, moda, arte, cine, música, literatura, tecnología' category = u'noticias, fotografía, blogs, moda, arte, cine, música, literatura, tecnología'
cover_url = 'http://www.seeklogo.com/images/V/Vice-logo-668578AC94-seeklogo.com.gif' cover_url = 'http://www.seeklogo.com/images/V/Vice-logo-668578AC94-seeklogo.com.gif'
oldest_article = 20 oldest_article = 14
max_articles_per_feed = 30 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = False
no_stylesheets = True no_stylesheets = True
language = 'es' language = 'es'
use_embedded_content = False
remove_javascript = True
publication_type = 'magazine'
recursions=10
match_regexps = [r'/read/.*\?Contentpage=[2-9]$']
keep_only_tags = [
dict(attrs={'class':['article_title','article_content','next']})
]
remove_tags = [
dict(attrs={'class':['social_buttons','search','tweet','like','inline_socials'
,'stumblebadge','plusone']})
]
extra_css = '''
.author{font-size:small}
img{margin-bottom: 0.4em; display:block; margin-left:auto; margin-right: auto}
'''
preprocess_regexps = [
(re.compile(r'<img src="http://.*\.scorecardresearch\.com/'), lambda m: '')
]
feeds = [('Vice', 'http://www.vice.com/es/rss')] feeds = [('Vice', 'http://www.vice.com/es/rss')]

View File

@ -0,0 +1,30 @@
from calibre.web.feeds.news import BasicNewsRecipe
class WirtualneMedia(BasicNewsRecipe):
title = u'wirtualnemedia.pl'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
__author__ = 'fenuks'
description = u'Portal o mediach, reklamie, internecie, PR, telekomunikacji - nr 1 w Polsce - WirtualneMedia.pl - wiadomości z pierwszej ręki.'
category = 'internet'
language = 'pl'
masthead_url= 'http://i.wp.pl/a/f/jpeg/8654/wirtualnemedia.jpeg'
cover_url= 'http://static.wirtualnemedia.pl/img/logo_wirtualnemedia_newsletter.gif'
remove_tags=[dict(id=['header', 'footer'])]
feeds = [(u'Gospodarka', u'http://www.wirtualnemedia.pl/rss/wm_gospodarka.xml'),
(u'Internet', u'http://www.wirtualnemedia.pl/rss/wm_internet.xml'),
(u'Kultura', u'http://www.wirtualnemedia.pl/rss/wm_kulturarozrywka.xml'),
(u'Badania', u'http://www.wirtualnemedia.pl/rss/wm_marketing.xml'),
(u'Prasa', u'http://www.wirtualnemedia.pl/rss/wm_prasa.xml'),
(u'Radio', u'http://www.wirtualnemedia.pl/rss/wm_radio.xml'),
(u'Reklama', u'http://www.wirtualnemedia.pl/rss/wm_reklama.xml'),
(u'PR', u'http://www.wirtualnemedia.pl/rss/wm_relations.xml'),
(u'Technologie', u'http://www.wirtualnemedia.pl/rss/wm_telekomunikacja.xml'),
(u'Telewizja', u'http://www.wirtualnemedia.pl/rss/wm_telewizja_rss.xml')
]
def print_version(self, url):
return url.replace('artykul', 'print')

View File

@ -375,7 +375,6 @@ class Build(Command):
"common/common.h", "common/common.h",
"common/config_file.h", "common/config_file.h",
"style/blurhelper.h", "style/blurhelper.h",
"style/dialogpixmaps.h",
"style/fixx11h.h", "style/fixx11h.h",
"style/pixmaps.h", "style/pixmaps.h",
"style/qtcurve.h", "style/qtcurve.h",

View File

@ -8,14 +8,14 @@ msgstr ""
"Project-Id-Version: calibre\n" "Project-Id-Version: calibre\n"
"Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n" "Report-Msgid-Bugs-To: FULL NAME <EMAIL@ADDRESS>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-04-27 18:24+0000\n" "PO-Revision-Date: 2012-06-06 17:20+0000\n"
"Last-Translator: Jellby <Unknown>\n" "Last-Translator: Jellby <Unknown>\n"
"Language-Team: Spanish <es@li.org>\n" "Language-Team: Spanish <es@li.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-04-28 04:54+0000\n" "X-Launchpad-Export-Date: 2012-06-07 04:40+0000\n"
"X-Generator: Launchpad (build 15149)\n" "X-Generator: Launchpad (build 15353)\n"
#. name for aaa #. name for aaa
msgid "Ghotuo" msgid "Ghotuo"
@ -383,7 +383,7 @@ msgstr "Tibetano amdo"
#. name for ady #. name for ady
msgid "Adyghe" msgid "Adyghe"
msgstr "Adyghe" msgstr "Adigué"
#. name for adz #. name for adz
msgid "Adzera" msgid "Adzera"
@ -951,7 +951,7 @@ msgstr "Alune"
#. name for alq #. name for alq
msgid "Algonquin" msgid "Algonquin"
msgstr "Algonquin" msgstr "Algonquino"
#. name for alr #. name for alr
msgid "Alutor" msgid "Alutor"
@ -1451,7 +1451,7 @@ msgstr "Araona"
#. name for arp #. name for arp
msgid "Arapaho" msgid "Arapaho"
msgstr "Arapaho" msgstr "Arapa"
#. name for arq #. name for arq
msgid "Arabic; Algerian" msgid "Arabic; Algerian"
@ -4363,7 +4363,7 @@ msgstr "Jalkunan"
#. name for bxm #. name for bxm
msgid "Buriat; Mongolia" msgid "Buriat; Mongolia"
msgstr "Buriat de Mongolia" msgstr "Buriato de Mongolia"
#. name for bxn #. name for bxn
msgid "Burduna" msgid "Burduna"
@ -4383,7 +4383,7 @@ msgstr "Beele"
#. name for bxr #. name for bxr
msgid "Buriat; Russia" msgid "Buriat; Russia"
msgstr "Buriat de Rusia" msgstr "Buriato de Rusia"
#. name for bxs #. name for bxs
msgid "Busam" msgid "Busam"
@ -4391,7 +4391,7 @@ msgstr "Busam"
#. name for bxu #. name for bxu
msgid "Buriat; China" msgid "Buriat; China"
msgstr "Buriat de China" msgstr "Buriato de China"
#. name for bxv #. name for bxv
msgid "Berakou" msgid "Berakou"
@ -4999,7 +4999,7 @@ msgstr "Mari (Rusia)"
#. name for chn #. name for chn
msgid "Chinook jargon" msgid "Chinook jargon"
msgstr "Chinook" msgstr "Jerga chinook"
#. name for cho #. name for cho
msgid "Choctaw" msgid "Choctaw"
@ -6135,7 +6135,7 @@ msgstr "Slave (atabascano)"
#. name for dep #. name for dep
msgid "Delaware; Pidgin" msgid "Delaware; Pidgin"
msgstr "Delaware pidyin" msgstr "Pidyin delaware"
#. name for deq #. name for deq
msgid "Dendi (Central African Republic)" msgid "Dendi (Central African Republic)"
@ -6723,7 +6723,7 @@ msgstr "Darai"
#. name for dsb #. name for dsb
msgid "Sorbian; Lower" msgid "Sorbian; Lower"
msgstr "Sorabo inferior" msgstr "Bajo sorabo"
#. name for dse #. name for dse
msgid "Dutch Sign Language" msgid "Dutch Sign Language"
@ -7831,7 +7831,7 @@ msgstr "Gabri"
#. name for gac #. name for gac
msgid "Great Andamanese; Mixed" msgid "Great Andamanese; Mixed"
msgstr "Gran Andamanés mixto" msgstr "Gran andamanés mixto"
#. name for gad #. name for gad
msgid "Gaddang" msgid "Gaddang"
@ -8479,23 +8479,23 @@ msgstr "Bajo alemán medio"
#. name for gmm #. name for gmm
msgid "Gbaya-Mbodomo" msgid "Gbaya-Mbodomo"
msgstr "" msgstr "Gbaya-Mbodomo"
#. name for gmn #. name for gmn
msgid "Gimnime" msgid "Gimnime"
msgstr "" msgstr "Gimnime"
#. name for gmu #. name for gmu
msgid "Gumalu" msgid "Gumalu"
msgstr "" msgstr "Gumalu"
#. name for gmv #. name for gmv
msgid "Gamo" msgid "Gamo"
msgstr "" msgstr "Gamo"
#. name for gmx #. name for gmx
msgid "Magoma" msgid "Magoma"
msgstr "" msgstr "Magoma"
#. name for gmy #. name for gmy
msgid "Greek; Mycenaean" msgid "Greek; Mycenaean"
@ -8503,11 +8503,11 @@ msgstr "Griego micénico"
#. name for gna #. name for gna
msgid "Kaansa" msgid "Kaansa"
msgstr "" msgstr "Kaansa"
#. name for gnb #. name for gnb
msgid "Gangte" msgid "Gangte"
msgstr "" msgstr "Gangte"
#. name for gnc #. name for gnc
msgid "Guanche" msgid "Guanche"
@ -8515,15 +8515,15 @@ msgstr "Guanche"
#. name for gnd #. name for gnd
msgid "Zulgo-Gemzek" msgid "Zulgo-Gemzek"
msgstr "" msgstr "Zulgo-Gemzek"
#. name for gne #. name for gne
msgid "Ganang" msgid "Ganang"
msgstr "" msgstr "Ganang"
#. name for gng #. name for gng
msgid "Ngangam" msgid "Ngangam"
msgstr "" msgstr "Ngangam"
#. name for gnh #. name for gnh
msgid "Lere" msgid "Lere"
@ -8535,7 +8535,7 @@ msgstr ""
#. name for gnk #. name for gnk
msgid "//Gana" msgid "//Gana"
msgstr "" msgstr "//Gana"
#. name for gnl #. name for gnl
msgid "Gangulu" msgid "Gangulu"
@ -8555,7 +8555,7 @@ msgstr "Gondi septentrional"
#. name for gnq #. name for gnq
msgid "Gana" msgid "Gana"
msgstr "" msgstr "Gana"
#. name for gnr #. name for gnr
msgid "Gureng Gureng" msgid "Gureng Gureng"
@ -8563,11 +8563,11 @@ msgstr ""
#. name for gnt #. name for gnt
msgid "Guntai" msgid "Guntai"
msgstr "" msgstr "Guntai"
#. name for gnu #. name for gnu
msgid "Gnau" msgid "Gnau"
msgstr "" msgstr "Gnau"
#. name for gnw #. name for gnw
msgid "Guaraní; Western Bolivian" msgid "Guaraní; Western Bolivian"
@ -8575,35 +8575,35 @@ msgstr "Guaraní boliviano occidental"
#. name for gnz #. name for gnz
msgid "Ganzi" msgid "Ganzi"
msgstr "" msgstr "Ganzi"
#. name for goa #. name for goa
msgid "Guro" msgid "Guro"
msgstr "" msgstr "Guro"
#. name for gob #. name for gob
msgid "Playero" msgid "Playero"
msgstr "" msgstr "Playero"
#. name for goc #. name for goc
msgid "Gorakor" msgid "Gorakor"
msgstr "" msgstr "Gorakor"
#. name for god #. name for god
msgid "Godié" msgid "Godié"
msgstr "" msgstr "Godié"
#. name for goe #. name for goe
msgid "Gongduk" msgid "Gongduk"
msgstr "" msgstr "Gongduk"
#. name for gof #. name for gof
msgid "Gofa" msgid "Gofa"
msgstr "" msgstr "Gofa"
#. name for gog #. name for gog
msgid "Gogo" msgid "Gogo"
msgstr "" msgstr "Gogo"
#. name for goh #. name for goh
msgid "German; Old High (ca. 750-1050)" msgid "German; Old High (ca. 750-1050)"
@ -8611,19 +8611,19 @@ msgstr "Alto alemán antiguo (ca. 750-1050)"
#. name for goi #. name for goi
msgid "Gobasi" msgid "Gobasi"
msgstr "" msgstr "Gobasi"
#. name for goj #. name for goj
msgid "Gowlan" msgid "Gowlan"
msgstr "" msgstr "Gowlan"
#. name for gok #. name for gok
msgid "Gowli" msgid "Gowli"
msgstr "" msgstr "Gowli"
#. name for gol #. name for gol
msgid "Gola" msgid "Gola"
msgstr "" msgstr "Gola"
#. name for gom #. name for gom
msgid "Konkani; Goan" msgid "Konkani; Goan"
@ -8635,15 +8635,15 @@ msgstr "Gondi"
#. name for goo #. name for goo
msgid "Gone Dau" msgid "Gone Dau"
msgstr "" msgstr "Gone Dau"
#. name for gop #. name for gop
msgid "Yeretuar" msgid "Yeretuar"
msgstr "" msgstr "Yeretuar"
#. name for goq #. name for goq
msgid "Gorap" msgid "Gorap"
msgstr "" msgstr "Gorap"
#. name for gor #. name for gor
msgid "Gorontalo" msgid "Gorontalo"
@ -8651,7 +8651,7 @@ msgstr "Gorontalo"
#. name for gos #. name for gos
msgid "Gronings" msgid "Gronings"
msgstr "" msgstr "Gronings"
#. name for got #. name for got
msgid "Gothic" msgid "Gothic"
@ -8659,15 +8659,15 @@ msgstr "Gótico"
#. name for gou #. name for gou
msgid "Gavar" msgid "Gavar"
msgstr "" msgstr "Gavar"
#. name for gow #. name for gow
msgid "Gorowa" msgid "Gorowa"
msgstr "" msgstr "Gorowa"
#. name for gox #. name for gox
msgid "Gobu" msgid "Gobu"
msgstr "" msgstr "Gobu"
#. name for goy #. name for goy
msgid "Goundo" msgid "Goundo"
@ -9683,7 +9683,7 @@ msgstr ""
#. name for hsb #. name for hsb
msgid "Sorbian; Upper" msgid "Sorbian; Upper"
msgstr "" msgstr "Alto sorabo"
#. name for hsh #. name for hsh
msgid "Hungarian Sign Language" msgid "Hungarian Sign Language"
@ -19291,7 +19291,7 @@ msgstr ""
#. name for nwc #. name for nwc
msgid "Newari; Old" msgid "Newari; Old"
msgstr "Newari antiguo" msgstr "Newarí antiguo"
#. name for nwe #. name for nwe
msgid "Ngwe" msgid "Ngwe"
@ -19311,7 +19311,7 @@ msgstr ""
#. name for nwx #. name for nwx
msgid "Newar; Middle" msgid "Newar; Middle"
msgstr "Newari medio" msgstr "Newarí medio"
#. name for nwy #. name for nwy
msgid "Nottoway-Meherrin" msgid "Nottoway-Meherrin"
@ -23027,7 +23027,7 @@ msgstr ""
#. name for sia #. name for sia
msgid "Sami; Akkala" msgid "Sami; Akkala"
msgstr "" msgstr "Sami de Akkala"
#. name for sib #. name for sib
msgid "Sebop" msgid "Sebop"
@ -23127,11 +23127,11 @@ msgstr ""
#. name for sjd #. name for sjd
msgid "Sami; Kildin" msgid "Sami; Kildin"
msgstr "" msgstr "Sami de Kildin"
#. name for sje #. name for sje
msgid "Sami; Pite" msgid "Sami; Pite"
msgstr "" msgstr "Sami de Pite"
#. name for sjg #. name for sjg
msgid "Assangori" msgid "Assangori"
@ -23139,7 +23139,7 @@ msgstr ""
#. name for sjk #. name for sjk
msgid "Sami; Kemi" msgid "Sami; Kemi"
msgstr "" msgstr "Sami de Kemi"
#. name for sjl #. name for sjl
msgid "Sajalong" msgid "Sajalong"
@ -23171,11 +23171,11 @@ msgstr ""
#. name for sjt #. name for sjt
msgid "Sami; Ter" msgid "Sami; Ter"
msgstr "" msgstr "Sami de Ter"
#. name for sju #. name for sju
msgid "Sami; Ume" msgid "Sami; Ume"
msgstr "" msgstr "Sami de Ume"
#. name for sjw #. name for sjw
msgid "Shawnee" msgid "Shawnee"
@ -23407,7 +23407,7 @@ msgstr ""
#. name for smj #. name for smj
msgid "Lule Sami" msgid "Lule Sami"
msgstr "Sami lule" msgstr "Sami de Lule"
#. name for smk #. name for smk
msgid "Bolinao" msgid "Bolinao"
@ -23423,7 +23423,7 @@ msgstr ""
#. name for smn #. name for smn
msgid "Sami; Inari" msgid "Sami; Inari"
msgstr "" msgstr "Sami de Inari"
#. name for smo #. name for smo
msgid "Samoan" msgid "Samoan"
@ -23443,7 +23443,7 @@ msgstr ""
#. name for sms #. name for sms
msgid "Sami; Skolt" msgid "Sami; Skolt"
msgstr "" msgstr "Sami de Skolt"
#. name for smt #. name for smt
msgid "Simte" msgid "Simte"
@ -24339,7 +24339,7 @@ msgstr "Subanen central"
#. name for syc #. name for syc
msgid "Syriac; Classical" msgid "Syriac; Classical"
msgstr "" msgstr "Siríaco clásico"
#. name for syi #. name for syi
msgid "Seki" msgid "Seki"
@ -28235,7 +28235,7 @@ msgstr ""
#. name for xal #. name for xal
msgid "Kalmyk" msgid "Kalmyk"
msgstr "" msgstr "Calmuco"
#. name for xam #. name for xam
msgid "/Xam" msgid "/Xam"

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 8, 54) numeric_version = (0, 8, 55)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -276,6 +276,16 @@ class ODTMetadataReader(MetadataReaderPlugin):
from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.odt import get_metadata
return get_metadata(stream) return get_metadata(stream)
class DocXMetadataReader(MetadataReaderPlugin):
name = 'Read DOCX metadata'
file_types = set(['docx'])
description = _('Read metadata from %s files')%'DOCX'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.docx import get_metadata
return get_metadata(stream)
class OPFMetadataReader(MetadataReaderPlugin): class OPFMetadataReader(MetadataReaderPlugin):
name = 'Read OPF metadata' name = 'Read OPF metadata'

View File

@ -672,6 +672,12 @@ class KindleFireOutput(KindleDXOutput):
dpi = 169.0 dpi = 169.0
comic_screen_size = (570, 1016) comic_screen_size = (570, 1016)
@classmethod
def tags_to_string(cls, tags):
# The idiotic fire doesn't obey the color:white directive
from xml.sax.saxutils import escape
return escape(', '.join(tags))
class IlliadOutput(OutputProfile): class IlliadOutput(OutputProfile):
name = 'Illiad' name = 'Illiad'

View File

@ -72,6 +72,7 @@ class ANDROID(USBMS):
# Sony Ericsson # Sony Ericsson
0xfce : { 0xfce : {
0xd12e : [0x0100], 0xd12e : [0x0100],
0xe156 : [0x226],
0xe15d : [0x226], 0xe15d : [0x226],
0xe14f : [0x0226], 0xe14f : [0x0226],
0x614f : [0x0226, 0x100], 0x614f : [0x0226, 0x100],

View File

@ -178,7 +178,13 @@ class DevicePlugin(Plugin):
if cvid == vid: if cvid == vid:
if pid in products: if pid in products:
if hasattr(self.VENDOR_ID, 'keys'): if hasattr(self.VENDOR_ID, 'keys'):
cbcd = self.VENDOR_ID[vid][pid] try:
cbcd = self.VENDOR_ID[vid][pid]
except KeyError:
# Vendor vid does not have product pid, pid
# exists for some other vendor in this
# device
continue
else: else:
cbcd = self.BCD cbcd = self.BCD
if self.test_bcd(bcd, cbcd): if self.test_bcd(bcd, cbcd):

View File

@ -224,16 +224,18 @@ class TREKSTOR(USBMS):
FORMATS = ['epub', 'txt', 'pdf'] FORMATS = ['epub', 'txt', 'pdf']
VENDOR_ID = [0x1e68] VENDOR_ID = [0x1e68]
PRODUCT_ID = [0x0041, 0x0042, 0x0052, 0x004e, PRODUCT_ID = [0x0041, 0x0042, 0x0052, 0x004e, 0x0056,
0x003e # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091 0x003e, # This is for the EBOOK_PLAYER_5M https://bugs.launchpad.net/bugs/792091
] ]
BCD = [0x0002] BCD = [0x0002, 0x100]
EBOOK_DIR_MAIN = 'Ebooks' EBOOK_DIR_MAIN = 'Ebooks'
VENDOR_NAME = 'TREKSTOR' VENDOR_NAME = 'TREKSTOR'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_PLAYER_7', WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['EBOOK_PLAYER_7',
'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0'] 'EBOOK_PLAYER_5M', 'EBOOK-READER_3.0', 'EREADER_PYRUS']
SUPPORTS_SUB_DIRS = True
SUPPORTS_SUB_DIRS_DEFAULT = False
class EEEREADER(USBMS): class EEEREADER(USBMS):

View File

@ -268,20 +268,92 @@ class PRST1(USBMS):
collections = booklist.get_collections(collections_attributes) collections = booklist.get_collections(collections_attributes)
with closing(sqlite.connect(dbpath)) as connection: with closing(sqlite.connect(dbpath)) as connection:
self.remove_orphaned_records(connection, dbpath)
self.update_device_books(connection, booklist, source_id, self.update_device_books(connection, booklist, source_id,
plugboard, dbpath) plugboard, dbpath)
self.update_device_collections(connection, booklist, collections, source_id) self.update_device_collections(connection, booklist, collections, source_id, dbpath)
debug_print('PRST1: finished update_device_database') debug_print('PRST1: finished update_device_database')
def update_device_books(self, connection, booklist, source_id, plugboard, def remove_orphaned_records(self, connection, dbpath):
dbpath):
from sqlite3 import DatabaseError from sqlite3 import DatabaseError
opts = self.settings() try:
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS] cursor = connection.cursor()
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
use_sony_authors = opts.extra_customization[self.OPT_USE_SONY_AUTHORS] debug_print("Removing Orphaned Collection Records")
# Purge any collections references that point into the abyss
query = 'DELETE FROM collections WHERE content_id NOT IN (SELECT _id FROM books)'
cursor.execute(query)
query = 'DELETE FROM collections WHERE collection_id NOT IN (SELECT _id FROM collection)'
cursor.execute(query)
debug_print("Removing Orphaned Book Records")
# Purge any references to books not in this database
# Idea is to prevent any spill-over where these wind up applying to some other book
query = 'DELETE FROM %s WHERE content_id NOT IN (SELECT _id FROM books)'
cursor.execute(query%'annotation')
cursor.execute(query%'bookmark')
cursor.execute(query%'current_position')
cursor.execute(query%'freehand')
cursor.execute(query%'history')
cursor.execute(query%'layout_cache')
cursor.execute(query%'preference')
cursor.close()
except DatabaseError:
import traceback
tb = traceback.format_exc()
raise DeviceError((('The SONY database is corrupted. '
' Delete the file %s on your reader and then disconnect '
' reconnect it. If you are using an SD card, you '
' should delete the file on the card as well. Note that '
' deleting this file will cause your reader to forget '
' any notes/highlights, etc.')%dbpath)+' Underlying error:'
'\n'+tb)
def get_lastrowid(self, cursor):
# SQLite3 + Python has a fun issue on 32-bit systems with integer overflows.
# Issue a SQL query instead, getting the value as a string, and then converting to a long python int manually.
query = 'SELECT last_insert_rowid()'
cursor.execute(query)
row = cursor.fetchone()
return long(row[0])
def get_database_min_id(self, source_id):
sequence_min = 0L
if source_id == 1:
sequence_min = 4294967296L
return sequence_min
def set_database_sequence_id(self, connection, table, sequence_id):
cursor = connection.cursor()
# Update the sequence Id if it exists
query = 'UPDATE sqlite_sequence SET seq = ? WHERE name = ?'
t = (sequence_id, table,)
cursor.execute(query, t)
# Insert the sequence Id if it doesn't
query = ('INSERT INTO sqlite_sequence (name, seq) '
'SELECT ?, ? '
'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)');
cursor.execute(query, (table, sequence_id, table,))
cursor.close()
def read_device_books(self, connection, source_id, dbpath):
from sqlite3 import DatabaseError
sequence_min = self.get_database_min_id(source_id)
sequence_max = sequence_min
sequence_dirty = 0
debug_print("Book Sequence Min: %d, Source Id: %d"%(sequence_min,source_id))
try: try:
cursor = connection.cursor() cursor = connection.cursor()
@ -300,27 +372,70 @@ class PRST1(USBMS):
' any notes/highlights, etc.')%dbpath)+' Underlying error:' ' any notes/highlights, etc.')%dbpath)+' Underlying error:'
'\n'+tb) '\n'+tb)
# Get the books themselves, but keep track of any that are less than the minimum.
# Record what the max id being used is as well.
db_books = {} db_books = {}
for i, row in enumerate(cursor): for i, row in enumerate(cursor):
lpath = row[0].replace('\\', '/') lpath = row[0].replace('\\', '/')
db_books[lpath] = row[1] db_books[lpath] = row[1]
if row[1] < sequence_min:
sequence_dirty = 1
else:
sequence_max = max(sequence_max, row[1])
# Work-around for Sony Bug (SD Card DB not using right SQLite sequence) # If the database is 'dirty', then we should fix up the Ids and the sequence number
if source_id == 1: if sequence_dirty == 1:
# Update any existing sequence numbers in the table that aren't in the required range debug_print("Book Sequence Dirty for Source Id: %d"%source_id)
sdcard_sequence_start = '4294967296' sequence_max = sequence_max + 1
query = 'UPDATE sqlite_sequence SET seq = ? WHERE seq < ?' for book, bookId in db_books.items():
t = (sdcard_sequence_start, sdcard_sequence_start,) if bookId < sequence_min:
cursor.execute(query, t) # Record the new Id and write it to the DB
db_books[book] = sequence_max
sequence_max = sequence_max + 1
# Insert sequence numbers for tables we will be manipulating, if they don't already exist # Fix the Books DB
query = ('INSERT INTO sqlite_sequence (name, seq) ' query = 'UPDATE books SET _id = ? WHERE file_path = ?'
'SELECT ?, ? ' t = (db_books[book], book,)
'WHERE NOT EXISTS (SELECT 1 FROM sqlite_sequence WHERE name = ?)'); cursor.execute(query, t)
cursor.execute(query, ('books',sdcard_sequence_start,'books',))
cursor.execute(query, ('collection',sdcard_sequence_start,'collection',)) # Fix any references so that they point back to the right book
cursor.execute(query, ('collections',sdcard_sequence_start,'collections',)) t = (db_books[book], bookId,)
query = 'UPDATE collections SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE annotation SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE bookmark SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE current_position SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE deleted_markups SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE dic_histories SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE freehand SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE history SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE layout_cache SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
query = 'UPDATE preference SET content_id = ? WHERE content_id = ?'
cursor.execute(query, t)
self.set_database_sequence_id(connection, 'books', sequence_max)
debug_print("Book Sequence Max: %d, Source Id: %d"%(sequence_max,source_id))
cursor.close()
return db_books
def update_device_books(self, connection, booklist, source_id, plugboard,
dbpath):
opts = self.settings()
upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
use_sony_authors = opts.extra_customization[self.OPT_USE_SONY_AUTHORS]
db_books = self.read_device_books(connection, source_id, dbpath)
cursor = connection.cursor()
for book in booklist: for book in booklist:
# Run through plugboard if needed # Run through plugboard if needed
@ -365,10 +480,10 @@ class PRST1(USBMS):
modified_date, lpath, modified_date, lpath,
os.path.basename(lpath), book.size, book.mime) os.path.basename(lpath), book.size, book.mime)
cursor.execute(query, t) cursor.execute(query, t)
book.bookId = cursor.lastrowid book.bookId = self.get_lastrowid(cursor)
if upload_covers: if upload_covers:
self.upload_book_cover(connection, book, source_id) self.upload_book_cover(connection, book, source_id)
debug_print('Inserted New Book: ' + book.title) debug_print('Inserted New Book: (%u) '%book.bookId + book.title)
else: else:
query = ''' query = '''
UPDATE books UPDATE books
@ -400,26 +515,111 @@ class PRST1(USBMS):
connection.commit() connection.commit()
cursor.close() cursor.close()
def update_device_collections(self, connection, booklist, collections, def read_device_collections(self, connection, source_id, dbpath):
source_id): from sqlite3 import DatabaseError
cursor = connection.cursor()
sequence_min = self.get_database_min_id(source_id)
sequence_max = sequence_min
sequence_dirty = 0
debug_print("Collection Sequence Min: %d, Source Id: %d"%(sequence_min,source_id))
try:
cursor = connection.cursor()
if collections:
# Get existing collections # Get existing collections
query = 'SELECT _id, title FROM collection' query = 'SELECT _id, title FROM collection'
cursor.execute(query) cursor.execute(query)
except DatabaseError:
import traceback
tb = traceback.format_exc()
raise DeviceError((('The SONY database is corrupted. '
' Delete the file %s on your reader and then disconnect '
' reconnect it. If you are using an SD card, you '
' should delete the file on the card as well. Note that '
' deleting this file will cause your reader to forget '
' any notes/highlights, etc.')%dbpath)+' Underlying error:'
'\n'+tb)
db_collections = {} db_collections = {}
for i, row in enumerate(cursor): for i, row in enumerate(cursor):
db_collections[row[1]] = row[0] db_collections[row[1]] = row[0]
if row[0] < sequence_min:
sequence_dirty = 1
else:
sequence_max = max(sequence_max, row[0])
# If the database is 'dirty', then we should fix up the Ids and the sequence number
if sequence_dirty == 1:
debug_print("Collection Sequence Dirty for Source Id: %d"%source_id)
sequence_max = sequence_max + 1
for collection, collectionId in db_collections.items():
if collectionId < sequence_min:
# Record the new Id and write it to the DB
db_collections[collection] = sequence_max
sequence_max = sequence_max + 1
# Fix the collection DB
query = 'UPDATE collection SET _id = ? WHERE title = ?'
t = (db_collections[collection], collection, )
cursor.execute(query, t)
# Fix any references in existing collections
query = 'UPDATE collections SET collection_id = ? WHERE collection_id = ?'
t = (db_collections[collection], collectionId,)
cursor.execute(query, t)
self.set_database_sequence_id(connection, 'collection', sequence_max)
debug_print("Collection Sequence Max: %d, Source Id: %d"%(sequence_max,source_id))
# Fix up the collections table now...
sequence_dirty = 0
sequence_max = sequence_min
debug_print("Collections Sequence Min: %d, Source Id: %d"%(sequence_min,source_id))
query = 'SELECT _id FROM collections'
cursor.execute(query)
db_collection_pairs = []
for i, row in enumerate(cursor):
db_collection_pairs.append(row[0])
if row[0] < sequence_min:
sequence_dirty = 1
else:
sequence_max = max(sequence_max, row[0])
if sequence_dirty == 1:
debug_print("Collections Sequence Dirty for Source Id: %d"%source_id)
sequence_max = sequence_max + 1
for pairId in db_collection_pairs:
if pairId < sequence_min:
# Record the new Id and write it to the DB
query = 'UPDATE collections SET _id = ? WHERE _id = ?'
t = (sequence_max, pairId,)
cursor.execute(query, t)
sequence_max = sequence_max + 1
self.set_database_sequence_id(connection, 'collections', sequence_max)
debug_print("Collections Sequence Max: %d, Source Id: %d"%(sequence_max,source_id))
cursor.close()
return db_collections
def update_device_collections(self, connection, booklist, collections,
source_id, dbpath):
if collections:
db_collections = self.read_device_collections(connection, source_id, dbpath)
cursor = connection.cursor()
for collection, books in collections.items(): for collection, books in collections.items():
if collection not in db_collections: if collection not in db_collections:
query = 'INSERT INTO collection (title, source_id) VALUES (?,?)' query = 'INSERT INTO collection (title, source_id) VALUES (?,?)'
t = (collection, source_id) t = (collection, source_id)
cursor.execute(query, t) cursor.execute(query, t)
db_collections[collection] = cursor.lastrowid db_collections[collection] = self.get_lastrowid(cursor)
debug_print('Inserted New Collection: ' + collection) debug_print('Inserted New Collection: (%u) '%db_collections[collection] + collection)
# Get existing books in collection # Get existing books in collection
query = ''' query = '''
@ -483,9 +683,8 @@ class PRST1(USBMS):
cursor.execute(query, t) cursor.execute(query, t)
debug_print('Deleted Collection: ' + collection) debug_print('Deleted Collection: ' + collection)
connection.commit()
connection.commit() cursor.close()
cursor.close()
def rebuild_collections(self, booklist, oncard): def rebuild_collections(self, booklist, oncard):
debug_print('PRST1: starting rebuild_collections') debug_print('PRST1: starting rebuild_collections')

View File

@ -30,6 +30,8 @@ class DeviceConfig(object):
SUPPORTS_SUB_DIRS = False SUPPORTS_SUB_DIRS = False
SUPPORTS_SUB_DIRS_FOR_SCAN = False # This setting is used when scanning for SUPPORTS_SUB_DIRS_FOR_SCAN = False # This setting is used when scanning for
# books when SUPPORTS_SUB_DIRS is False # books when SUPPORTS_SUB_DIRS is False
SUPPORTS_SUB_DIRS_DEFAULT = True
MUST_READ_METADATA = False MUST_READ_METADATA = False
SUPPORTS_USE_AUTHOR_SORT = False SUPPORTS_USE_AUTHOR_SORT = False
@ -57,7 +59,7 @@ class DeviceConfig(object):
c = Config('device_drivers_%s' % name, _('settings for device drivers')) c = Config('device_drivers_%s' % name, _('settings for device drivers'))
c.add_opt('format_map', default=cls.FORMATS, c.add_opt('format_map', default=cls.FORMATS,
help=_('Ordered list of formats the device will accept')) help=_('Ordered list of formats the device will accept'))
c.add_opt('use_subdirs', default=True, c.add_opt('use_subdirs', default=cls.SUPPORTS_SUB_DIRS_DEFAULT,
help=_('Place files in sub directories if the device supports them')) help=_('Place files in sub directories if the device supports them'))
c.add_opt('read_metadata', default=True, c.add_opt('read_metadata', default=True,
help=_('Read metadata from files on device')) help=_('Read metadata from files on device'))

View File

@ -382,7 +382,8 @@ class USBMS(CLI, Device):
os.makedirs(self.normalize_path(self._main_prefix)) os.makedirs(self.normalize_path(self._main_prefix))
def write_prefix(prefix, listid): def write_prefix(prefix, listid):
if prefix is not None and isinstance(booklists[listid], self.booklist_class): if (prefix is not None and len(booklists) > listid and
isinstance(booklists[listid], self.booklist_class)):
if not os.path.exists(prefix): if not os.path.exists(prefix):
os.makedirs(self.normalize_path(prefix)) os.makedirs(self.normalize_path(prefix))
with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f: with open(self.normalize_path(os.path.join(prefix, self.METADATA_CACHE)), 'wb') as f:

View File

@ -8,6 +8,8 @@ from itertools import cycle
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
class EPUBInput(InputFormatPlugin): class EPUBInput(InputFormatPlugin):
name = 'EPUB Input' name = 'EPUB Input'
@ -18,18 +20,24 @@ class EPUBInput(InputFormatPlugin):
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)]) recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
def decrypt_font(self, key, path): def decrypt_font(self, key, path, algorithm):
raw = open(path, 'rb').read() is_adobe = algorithm == ADOBE_OBFUSCATION
crypt = raw[:1024] crypt_len = 1024 if is_adobe else 1040
key = cycle(iter(key)) with open(path, 'rb') as f:
decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt]) raw = f.read()
crypt = bytearray(raw[:crypt_len])
key = cycle(iter(bytearray(key)))
decrypt = bytes(bytearray(x^key.next() for x in crypt))
with open(path, 'wb') as f: with open(path, 'wb') as f:
f.write(decrypt) f.write(decrypt)
f.write(raw[1024:]) f.write(raw[crypt_len:])
def process_encryption(self, encfile, opf, log): def process_encryption(self, encfile, opf, log):
from lxml import etree from lxml import etree
import uuid import uuid, hashlib
idpf_key = opf.unique_identifier
if idpf_key:
idpf_key = hashlib.sha1(idpf_key).digest()
key = None key = None
for item in opf.identifier_iter(): for item in opf.identifier_iter():
scheme = None scheme = None
@ -39,8 +47,8 @@ class EPUBInput(InputFormatPlugin):
if (scheme and scheme.lower() == 'uuid') or \ if (scheme and scheme.lower() == 'uuid') or \
(item.text and item.text.startswith('urn:uuid:')): (item.text and item.text.startswith('urn:uuid:')):
try: try:
key = str(item.text).rpartition(':')[-1] key = bytes(item.text).rpartition(':')[-1]
key = list(map(ord, uuid.UUID(key).bytes)) key = uuid.UUID(key).bytes
except: except:
import traceback import traceback
traceback.print_exc() traceback.print_exc()
@ -50,14 +58,16 @@ class EPUBInput(InputFormatPlugin):
root = etree.parse(encfile) root = etree.parse(encfile)
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'): for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
algorithm = em.get('Algorithm', '') algorithm = em.get('Algorithm', '')
if algorithm != 'http://ns.adobe.com/pdf/enc#RC': if algorithm not in {ADOBE_OBFUSCATION,
'http://www.idpf.org/2008/embedding'}:
return False return False
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0] cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
uri = cr.get('URI') uri = cr.get('URI')
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/'))) path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
if key is not None and os.path.exists(path): tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
if (tkey and os.path.exists(path)):
self._encrypted_font_uris.append(uri) self._encrypted_font_uris.append(uri)
self.decrypt_font(key, path) self.decrypt_font(tkey, path, algorithm)
return True return True
except: except:
import traceback import traceback

View File

@ -223,6 +223,8 @@ class MOBIOutput(OutputFormatPlugin):
else: else:
# Add rasterized SVG images # Add rasterized SVG images
resources.add_extra_images() resources.add_extra_images()
if hasattr(self.oeb, 'inserted_metadata_jacket'):
self.workaround_fire_bugs(self.oeb.inserted_metadata_jacket)
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables) mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
mobimlizer(oeb, opts) mobimlizer(oeb, opts)
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz') write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
@ -236,6 +238,18 @@ class MOBIOutput(OutputFormatPlugin):
from calibre.ebooks.mobi.writer8.cleanup import CSSCleanup from calibre.ebooks.mobi.writer8.cleanup import CSSCleanup
CSSCleanup(log, opts)(item, stylizer) CSSCleanup(log, opts)(item, stylizer)
def workaround_fire_bugs(self, jacket):
# The idiotic Fire crashes when trying to render the table used to
# layout the jacket
from calibre.ebooks.oeb.base import XHTML
for table in jacket.data.xpath('//*[local-name()="table"]'):
table.tag = XHTML('div')
for tr in table.xpath('descendant::*[local-name()="tr"]'):
cols = tr.xpath('descendant::*[local-name()="td"]')
tr.tag = XHTML('div')
for td in cols:
td.tag = XHTML('span' if cols else 'div')
class AZW3Output(OutputFormatPlugin): class AZW3Output(OutputFormatPlugin):
name = 'AZW3 Output' name = 'AZW3 Output'

View File

@ -0,0 +1,89 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from lxml import etree
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.zipfile import ZipFile
from calibre.utils.magick.draw import identify_data
from calibre.ebooks.oeb.base import DC11_NS
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
NSMAP = {'dc':DC11_NS,
'cp':'http://schemas.openxmlformats.org/package/2006/metadata/core-properties'}
def XPath(expr):
return etree.XPath(expr, namespaces=NSMAP)
def _read_doc_props(raw, mi):
from calibre.ebooks.metadata import string_to_authors
root = etree.fromstring(raw, parser=RECOVER_PARSER)
titles = XPath('//dc:title')(root)
if titles:
title = titles[0].text
if title and title.strip():
mi.title = title.strip()
tags = []
for subject in XPath('//dc:subject')(root):
if subject.text and subject.text.strip():
tags.append(subject.text.strip().replace(',', '_'))
for keywords in XPath('//cp:keywords')(root):
if keywords.text and keywords.text.strip():
for x in keywords.text.split():
tags.extend(y.strip() for y in x.split(','))
if tags:
mi.tags = tags
authors = XPath('//dc:creator')(root)
aut = []
for author in authors:
if author.text and author.text.strip():
aut.extend(string_to_authors(author.text))
if aut:
mi.authors = aut
desc = XPath('//dc:description')(root)
if desc:
raw = etree.tostring(desc[0], method='text', encoding=unicode)
mi.comments = raw
def _read_app_props(raw, mi):
root = etree.fromstring(raw, parser=RECOVER_PARSER)
company = root.xpath('//*[local-name()="Company"]')
if company and company[0].text and company[0].text.strip():
mi.publisher = company[0].text.strip()
def get_metadata(stream):
with ZipFile(stream, 'r') as zf:
mi = Metadata(_('Unknown'))
cdata = None
for zi in zf.infolist():
ext = zi.filename.rpartition('.')[-1].lower()
if zi.filename.lower() == 'docprops/core.xml':
_read_doc_props(zf.read(zi), mi)
elif zi.filename.lower() == 'docprops/app.xml':
_read_app_props(zf.read(zi), mi)
elif cdata is None and ext in {'jpeg', 'jpg', 'png', 'gif'}:
raw = zf.read(zi)
try:
width, height, fmt = identify_data(raw)
except:
continue
if 0.8 <= height/width <= 1.8 and height*width >= 12000:
cdata = (fmt, raw)
if cdata is not None:
mi.cover_data = cdata
return mi
if __name__ == '__main__':
import sys
with open(sys.argv[-1], 'rb') as stream:
print (get_metadata(stream))

View File

@ -991,6 +991,21 @@ class OPF(object): # {{{
for item in self.identifier_path(self.metadata): for item in self.identifier_path(self.metadata):
yield item yield item
@property
def unique_identifier(self):
uuid_elem = None
for attr in self.root.attrib:
if attr.endswith('unique-identifier'):
uuid_elem = self.root.attrib[attr]
break
if uuid_elem:
matches = self.root.xpath('//*[@id=%r]'%uuid_elem)
if matches:
for m in matches:
raw = m.text
if raw:
return raw.rpartition(':')[-1]
def guess_cover(self): def guess_cover(self):
''' '''
Try to guess a cover. Needed for some old/badly formed OPF files. Try to guess a cover. Needed for some old/badly formed OPF files.

View File

@ -338,8 +338,15 @@ class OEBReader(object):
href = elem.get('href') href = elem.get('href')
path = urlnormalize(urldefrag(href)[0]) path = urlnormalize(urldefrag(href)[0])
if path not in manifest.hrefs: if path not in manifest.hrefs:
self.logger.warn(u'Guide reference %r not found' % href) corrected_href = None
continue for href in manifest.hrefs:
if href.lower() == path.lower():
corrected_href = href
break
if corrected_href is None:
self.logger.warn(u'Guide reference %r not found' % href)
continue
href = corrected_href
guide.add(elem.get('type'), elem.get('title'), href) guide.add(elem.get('type'), elem.get('title'), href)
def _find_ncx(self, opf): def _find_ncx(self, opf):

View File

@ -15,10 +15,10 @@ class Clean(object):
if 'cover' not in self.oeb.guide: if 'cover' not in self.oeb.guide:
covers = [] covers = []
for x in ('other.ms-coverimage-standard', for x in ('other.ms-coverimage-standard', 'coverimagestandard',
'other.ms-titleimage-standard', 'other.ms-titleimage', 'other.ms-titleimage-standard', 'other.ms-titleimage',
'other.ms-coverimage', 'other.ms-thumbimage-standard', 'other.ms-coverimage', 'other.ms-thumbimage-standard',
'other.ms-thumbimage'): 'other.ms-thumbimage', 'thumbimagestandard'):
if x in self.oeb.guide: if x in self.oeb.guide:
href = self.oeb.guide[x].href href = self.oeb.guide[x].href
item = self.oeb.manifest.hrefs[href] item = self.oeb.manifest.hrefs[href]

View File

@ -72,6 +72,7 @@ class Jacket(object):
item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
self.oeb.spine.insert(0, item, True) self.oeb.spine.insert(0, item, True)
self.oeb.inserted_metadata_jacket = item
def remove_existing_jacket(self): def remove_existing_jacket(self):
for x in self.oeb.spine[:4]: for x in self.oeb.spine[:4]:

View File

@ -46,6 +46,7 @@ class SVGRasterizer(object):
def __call__(self, oeb, context): def __call__(self, oeb, context):
oeb.logger.info('Rasterizing SVG images...') oeb.logger.info('Rasterizing SVG images...')
self.stylizer_cache = {}
self.oeb = oeb self.oeb = oeb
self.opts = context self.opts = context
self.profile = context.dest self.profile = context.dest
@ -116,29 +117,35 @@ class SVGRasterizer(object):
elem.attrib[XLINK('href')] = data elem.attrib[XLINK('href')] = data
return svg return svg
def stylizer(self, item):
ans = self.stylizer_cache.get(item, None)
if ans is None:
ans = Stylizer(item.data, item.href, self.oeb, self.opts,
self.profile)
self.stylizer_cache[item] = ans
return ans
def rasterize_spine(self): def rasterize_spine(self):
for item in self.oeb.spine: for item in self.oeb.spine:
html = item.data self.rasterize_item(item)
stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
self.rasterize_item(item, stylizer)
def rasterize_item(self, item, stylizer): def rasterize_item(self, item):
html = item.data html = item.data
hrefs = self.oeb.manifest.hrefs hrefs = self.oeb.manifest.hrefs
for elem in xpath(html, '//h:img[@src]'): for elem in xpath(html, '//h:img[@src]'):
src = urlnormalize(elem.attrib['src']) src = urlnormalize(elem.attrib['src'])
image = hrefs.get(item.abshref(src), None) image = hrefs.get(item.abshref(src), None)
if image and image.media_type == SVG_MIME: if image and image.media_type == SVG_MIME:
style = stylizer.style(elem) style = self.stylizer(item).style(elem)
self.rasterize_external(elem, style, item, image) self.rasterize_external(elem, style, item, image)
for elem in xpath(html, '//h:object[@type="%s" and @data]' % SVG_MIME): for elem in xpath(html, '//h:object[@type="%s" and @data]' % SVG_MIME):
data = urlnormalize(elem.attrib['data']) data = urlnormalize(elem.attrib['data'])
image = hrefs.get(item.abshref(data), None) image = hrefs.get(item.abshref(data), None)
if image and image.media_type == SVG_MIME: if image and image.media_type == SVG_MIME:
style = stylizer.style(elem) style = self.stylizer(item).style(elem)
self.rasterize_external(elem, style, item, image) self.rasterize_external(elem, style, item, image)
for elem in xpath(html, '//svg:svg'): for elem in xpath(html, '//svg:svg'):
style = stylizer.style(elem) style = self.stylizer(item).style(elem)
self.rasterize_inline(elem, style, item) self.rasterize_inline(elem, style, item)
def rasterize_inline(self, elem, style, item): def rasterize_inline(self, elem, style, item):

View File

@ -13,7 +13,7 @@ from PyQt4.Qt import (QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt,
ORG_NAME = 'KovidsBrain' ORG_NAME = 'KovidsBrain'
APP_UID = 'libprs500' APP_UID = 'libprs500'
from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx, from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx,
config_dir) config_dir, filesystem_encoding)
from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.utils.date import UNDEFINED_DATE from calibre.utils.date import UNDEFINED_DATE
@ -742,6 +742,35 @@ class Application(QApplication):
'pyd' if iswindows else 'so')) 'pyd' if iswindows else 'so'))
pi.load_style(path, 'Calibre') pi.load_style(path, 'Calibre')
self.setPalette(orig_pal) self.setPalette(orig_pal)
style = self.style()
icon_map = {}
pcache = {}
for k, v in {
'DialogYesButton': u'ok.png',
'DialogNoButton': u'window-close.png',
'DialogCloseButton': u'window-close.png',
'DialogOkButton': u'ok.png',
'DialogCancelButton': u'window-close.png',
'DialogHelpButton': u'help.png',
'DialogOpenButton': u'document_open.png',
'DialogSaveButton': u'save.png',
'DialogApplyButton': u'ok.png',
'DialogDiscardButton': u'trash.png',
'MessageBoxInformation': u'dialog_information.png',
'MessageBoxWarning': u'dialog_warning.png',
'MessageBoxCritical': u'dialog_error.png',
'MessageBoxQuestion': u'dialog_question.png',
}.iteritems():
if v not in pcache:
p = I(v)
if isinstance(p, bytes):
p = p.decode(filesystem_encoding)
# if not os.path.exists(p): raise ValueError(p)
pcache[v] = p
v = pcache[v]
icon_map[type('')(getattr(style, 'SP_'+k))] = v
style.setProperty(u'calibre_icon_map', icon_map)
self.__icon_map_memory_ = icon_map
def setup_styles(self, force_calibre_style): def setup_styles(self, force_calibre_style):
self.original_font = QFont(QApplication.font()) self.original_font = QFont(QApplication.font())

View File

@ -100,19 +100,6 @@ class MetadataSingleDialogBase(ResizableDialog):
geom = gprefs.get('metasingle_window_geometry3', None) geom = gprefs.get('metasingle_window_geometry3', None)
if geom is not None: if geom is not None:
self.restoreGeometry(bytes(geom)) self.restoreGeometry(bytes(geom))
self.title.resizeEvent = self.fix_push_buttons
def fix_push_buttons(self, *args):
# Ensure all PushButtons stay the same consistent height throughout this
# dialog. Without this, the buttons inside scrollareas get shrunk,
# while the buttons outside them do not, leading to weirdness.
# Further, buttons with and without icons have different minimum sizes
# so things look even more out of whack.
ht = self.title.height() + 2
for but in self.findChildren(QPushButton):
but.setMaximumHeight(ht)
but.setMinimumHeight(ht)
return TitleEdit.resizeEvent(self.title, *args)
# }}} # }}}
def create_basic_metadata_widgets(self): # {{{ def create_basic_metadata_widgets(self): # {{{
@ -525,6 +512,8 @@ class MetadataSingleDialogBase(ResizableDialog):
' [Alt+Left]')%prev ' [Alt+Left]')%prev
self.prev_button.setToolTip(tip) self.prev_button.setToolTip(tip)
self.prev_button.setEnabled(prev is not None) self.prev_button.setEnabled(prev is not None)
self.button_box.button(self.button_box.Ok).setDefault(True)
self.button_box.button(self.button_box.Ok).setFocus(Qt.OtherFocusReason)
self(self.db.id(self.row_list[self.current_row])) self(self.db.id(self.row_list[self.current_row]))
def break_cycles(self): def break_cycles(self):
@ -993,7 +982,7 @@ def edit_metadata(db, row_list, current_row, parent=None, view_slot=None,
return d.changed, d.rows_to_refresh return d.changed, d.rows_to_refresh
if __name__ == '__main__': if __name__ == '__main__':
from PyQt4.Qt import QApplication from calibre.gui2 import Application as QApplication
app = QApplication([]) app = QApplication([])
from calibre.library import db as db_ from calibre.library import db as db_
db = db_() db = db_()

View File

@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QApplication, QFont, QFontInfo, QFontDialog, from PyQt4.Qt import (QApplication, QFont, QFontInfo, QFontDialog,
QAbstractListModel, Qt, QIcon, QKeySequence, QStyleFactory) QAbstractListModel, Qt, QIcon, QKeySequence)
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
from calibre.gui2.preferences.look_feel_ui import Ui_Form from calibre.gui2.preferences.look_feel_ui import Ui_Form
@ -104,11 +104,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('widget_style', gprefs, restart_required=True, choices= r('widget_style', gprefs, restart_required=True, choices=
[(_('System default'), 'system'), (_('Calibre style'), [(_('System default'), 'system'), (_('Calibre style'),
'calibre')]) 'calibre')])
styles = set(map(unicode, QStyleFactory.keys()))
if 'QtCurve' not in styles:
# Can happen in linux
for x in ('opt', 'label'):
getattr(self, x+'_widget_style').setVisible(False)
r('cover_flow_queue_length', config, restart_required=True) r('cover_flow_queue_length', config, restart_required=True)

View File

@ -206,12 +206,12 @@ class Preferences(QMainWindow):
self.cw.layout().addWidget(self.stack) self.cw.layout().addWidget(self.stack)
self.bb = QDialogButtonBox(QDialogButtonBox.Close) self.bb = QDialogButtonBox(QDialogButtonBox.Close)
self.wizard_button = self.bb.addButton(_('Run welcome wizard'), self.wizard_button = self.bb.addButton(_('Run welcome wizard'),
self.bb.DestructiveRole) self.bb.ActionRole)
self.wizard_button.setIcon(QIcon(I('wizard.png'))) self.wizard_button.setIcon(QIcon(I('wizard.png')))
self.wizard_button.clicked.connect(self.run_wizard, self.wizard_button.clicked.connect(self.run_wizard,
type=Qt.QueuedConnection) type=Qt.QueuedConnection)
self.bb.button(self.bb.Close).setDefault(True)
self.cw.layout().addWidget(self.bb) self.cw.layout().addWidget(self.bb)
self.bb.button(self.bb.Close).setDefault(True)
self.bb.rejected.connect(self.close, type=Qt.QueuedConnection) self.bb.rejected.connect(self.close, type=Qt.QueuedConnection)
self.setCentralWidget(self.cw) self.setCentralWidget(self.cw)
self.browser = Browser(self) self.browser = Browser(self)
@ -381,8 +381,8 @@ class Preferences(QMainWindow):
return QMainWindow.closeEvent(self, *args) return QMainWindow.closeEvent(self, *args)
if __name__ == '__main__': if __name__ == '__main__':
from PyQt4.Qt import QApplication from calibre.gui2 import Application
app = QApplication([]) app = Application([])
app app
gui = init_gui() gui = init_gui()

View File

@ -42,7 +42,7 @@ class ProceedQuestion(QDialog):
ic.setMaximumHeight(100) ic.setMaximumHeight(100)
ic.setScaledContents(True) ic.setScaledContents(True)
ic.setStyleSheet('QLabel { margin-right: 10px }') ic.setStyleSheet('QLabel { margin-right: 10px }')
self.bb = QDialogButtonBox(QDialogButtonBox.Yes|QDialogButtonBox.No) self.bb = QDialogButtonBox()
self.bb.accepted.connect(self.accept) self.bb.accepted.connect(self.accept)
self.bb.rejected.connect(self.reject) self.bb.rejected.connect(self.reject)
self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole) self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
@ -59,6 +59,7 @@ class ProceedQuestion(QDialog):
_('Show detailed information about this error')) _('Show detailed information about this error'))
self.det_msg = QPlainTextEdit(self) self.det_msg = QPlainTextEdit(self)
self.det_msg.setReadOnly(True) self.det_msg.setReadOnly(True)
self.bb.setStandardButtons(self.bb.Yes|self.bb.No)
self.bb.button(self.bb.Yes).setDefault(True) self.bb.button(self.bb.Yes).setDefault(True)
l.addWidget(ic, 0, 0, 1, 1) l.addWidget(ic, 0, 0, 1, 1)
@ -121,10 +122,10 @@ class ProceedQuestion(QDialog):
self.det_msg.setVisible(False) self.det_msg.setVisible(False)
self.det_msg_toggle.setVisible(bool(question.det_msg)) self.det_msg_toggle.setVisible(bool(question.det_msg))
self.det_msg_toggle.setText(self.show_det_msg) self.det_msg_toggle.setText(self.show_det_msg)
self.bb.button(self.bb.Yes).setDefault(True)
self.do_resize() self.do_resize()
self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason)
self.show() self.show()
self.bb.button(self.bb.Yes).setDefault(True)
self.bb.button(self.bb.Yes).setFocus(Qt.OtherFocusReason)
def __call__(self, callback, payload, html_log, log_viewer_title, title, def __call__(self, callback, payload, html_log, log_viewer_title, title,
msg, det_msg='', show_copy_button=False, cancel_callback=None, msg, det_msg='', show_copy_button=False, cancel_callback=None,
@ -164,7 +165,14 @@ class ProceedQuestion(QDialog):
self.log_viewer = ViewLog(q.log_viewer_title, log, self.log_viewer = ViewLog(q.log_viewer_title, log,
parent=self) parent=self)
if __name__ == '__main__': def main():
app = QApplication([]) from calibre.gui2 import Application
ProceedQuestion(None).exec_() app = Application([])
p = ProceedQuestion(None)
p(lambda p:None, None, 'ass', 'ass', 'testing', 'testing')
p.exec_()
app
if __name__ == '__main__':
main()

View File

@ -104,7 +104,7 @@ class TagsView(QTreeView): # {{{
self.setStyleSheet(''' self.setStyleSheet('''
QTreeView { QTreeView {
background-color: palette(window); background-color: palette(window);
color: palette(text); color: palette(window-text);
border: none; border: none;
} }
@ -117,7 +117,7 @@ class TagsView(QTreeView): # {{{
QTreeView::item:hover { QTreeView::item:hover {
background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #e7effd, stop: 1 #cbdaf1); background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #e7effd, stop: 1 #cbdaf1);
border: 1px solid #bfcde4; border: 1px solid #bfcde4;
border-radius: 8px; border-radius: 6px;
} }
''') ''')

View File

@ -20,7 +20,7 @@ class TOCView(QTreeView):
self.setStyleSheet(''' self.setStyleSheet('''
QTreeView { QTreeView {
background-color: palette(window); background-color: palette(window);
color: palette(text); color: palette(window-text);
border: none; border: none;
} }
QTreeView::item { QTreeView::item {
@ -32,7 +32,7 @@ class TOCView(QTreeView):
QTreeView::item:hover { QTreeView::item:hover {
background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #e7effd, stop: 1 #cbdaf1); background: qlineargradient(x1: 0, y1: 0, x2: 0, y2: 1, stop: 0 #e7effd, stop: 1 #cbdaf1);
border: 1px solid #bfcde4; border: 1px solid #bfcde4;
border-radius: 8px; border-radius: 6px;
} }
QHeaderView::section { QHeaderView::section {
background-color: qlineargradient(x1:0, y1:0, x2:0, y2:1, background-color: qlineargradient(x1:0, y1:0, x2:0, y2:1,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More