Sync to trunk.

This commit is contained in:
John Schember 2011-08-25 18:28:25 -04:00
commit b261ba7ef3
106 changed files with 2674943 additions and 2274 deletions

View File

@ -9,6 +9,12 @@ License: GPL-2 or later
The full text of the GPL is distributed as in The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL-2 on Debian systems. /usr/share/common-licenses/GPL-2 on Debian systems.
Files: setup/iso_639/*
Copyright: Various
License: LGPL 2.1
The full text of the LGPL is distributed as in
/usr/share/common-licenses/LGPL-2.1 on Debian systems.
Files: src/calibre/ebooks/BeautifulSoup.py Files: src/calibre/ebooks/BeautifulSoup.py
Copyright: Copyright (c) 2004-2007, Leonard Richardson Copyright: Copyright (c) 2004-2007, Leonard Richardson
License: BSD License: BSD
@ -28,6 +34,12 @@ License: other
are permitted in any medium without royalty provided the copyright are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. notice and this notice are preserved.
Files: src/calibre/ebooks/readability/*
Copyright: Unknown
License: Apache 2.0
The full text of the Apache 2.0 license is available at:
http://www.apache.org/licenses/LICENSE-2.0
Files: /src/cherrypy/* Files: /src/cherrypy/*
Copyright: Copyright (c) 2004-2007, CherryPy Team (team@cherrypy.org) Copyright: Copyright (c) 2004-2007, CherryPy Team (team@cherrypy.org)
Copyright: Copyright (C) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net> Copyright: Copyright (C) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net>

View File

@ -28,11 +28,12 @@ class CNN(BasicNewsRecipe):
(re.compile(r'<style.*?</style>', re.DOTALL), lambda m: ''), (re.compile(r'<style.*?</style>', re.DOTALL), lambda m: ''),
] ]
keep_only_tags = [dict(id='cnnContentContainer')] keep_only_tags = [dict(id=['cnnContentContainer', 'storycontent'])]
remove_tags = [ remove_tags = [
{'class':['cnn_strybtntools', 'cnn_strylftcntnt', {'class':['cnn_strybtntools', 'cnn_strylftcntnt',
'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt', 'cnn_strybtntools', 'cnn_strybtntoolsbttm', 'cnn_strybtmcntnt',
'cnn_strycntntrgt']}, 'cnn_strycntntrgt', 'hed_side', 'foot']},
dict(id=['ie_column']),
] ]

View File

@ -1,5 +1,3 @@
#import re # Provides preprocess_regexps re.compile
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class FairbanksDailyNewsminer(BasicNewsRecipe): class FairbanksDailyNewsminer(BasicNewsRecipe):
@ -8,21 +6,28 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
description = ''''The voice of interior Alaska since 1903''' description = 'The voice of interior Alaska since 1903'
publisher = 'http://www.newsminer.com/' publisher = 'http://www.newsminer.com/'
category = 'news, Alaska, Fairbanks' category = 'news, Alaska, Fairbanks'
language = 'en' language = 'en'
#extra_css = '''
# p{font-weight: normal;text-align: justify} # Make article titles, author and date bold, italic or small font.
# ''' # http://assets.matchbin.com/sites/635/stylesheets/newsminer.com.css
# (signature_line contains date, views, comments)
extra_css = '''
.story_item_headline { font-size: medium; font-weight: bold; }
.story_item_author { font-size: small; font-style:italic; }
.signature_line { font-size: small; }
'''
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'en' language = 'en'
encoding = 'utf8' encoding = 'utf8'
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
# TODO: I don't see any photos in my Mobi file with this masterhead_url!
# TODO: The News-miner cover image seems a bit small. Can this be enlarged by 10-30%?
masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg' masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
@ -31,6 +36,10 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
# manual processing is needed to get just the "story_item_date updated" # manual processing is needed to get just the "story_item_date updated"
# (which contains the date). Everything else on this line is pretty much not needed. # (which contains the date). Everything else on this line is pretty much not needed.
# #
# Currently, you will see the following:
# | Aug 24, 2011 | 654 views | 6 | |
# (ie. 6 comments)
#
# HTML line containing story_item_date: # HTML line containing story_item_date:
# <div class="signature_line"><span title="2011-08-22T23:37:14Z" class="story_item_date updated">Aug 22, 2011</span>&nbsp;|&nbsp;2370&nbsp;views&nbsp;|&nbsp;52&nbsp;<a href="/pages/full_story/push?article-Officials+tout+new+South+Cushman+homeless+living+facility%20&id=15183753#comments_15183753"><img alt="52 comments" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/comments-icon.gif" title="52 comments" /></a>&nbsp;|&nbsp;<span id="number_recommendations_15183753" class="number_recommendations">9</span>&nbsp;<a href="#1" id="recommend_link_15183753" onclick="Element.remove('recommend_link_15183753'); new Ajax.Request('/community/content/recommend/15183753', {asynchronous:true, evalScripts:true}); return false;"><img alt="9 recommendations" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/thumbs-up-icon.gif" title="9 recommendations" /></a>&nbsp;|&nbsp;<a href="#1" onclick="$j.facebox({ajax: '/community/content/email_friend_pane/15183753'}); return false;"><span style="position: relative;"><img alt="email to a friend" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/email-this.gif" title="email to a friend" /></span></a>&nbsp;|&nbsp;<span><a href="/printer_friendly/15183753" target="_blank"><img alt="print" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/print_icon.gif" title="print" /></a></span><span id="email_content_message_15183753" class="signature_email_message"></span></div> # <div class="signature_line"><span title="2011-08-22T23:37:14Z" class="story_item_date updated">Aug 22, 2011</span>&nbsp;|&nbsp;2370&nbsp;views&nbsp;|&nbsp;52&nbsp;<a href="/pages/full_story/push?article-Officials+tout+new+South+Cushman+homeless+living+facility%20&id=15183753#comments_15183753"><img alt="52 comments" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/comments-icon.gif" title="52 comments" /></a>&nbsp;|&nbsp;<span id="number_recommendations_15183753" class="number_recommendations">9</span>&nbsp;<a href="#1" id="recommend_link_15183753" onclick="Element.remove('recommend_link_15183753'); new Ajax.Request('/community/content/recommend/15183753', {asynchronous:true, evalScripts:true}); return false;"><img alt="9 recommendations" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/thumbs-up-icon.gif" title="9 recommendations" /></a>&nbsp;|&nbsp;<a href="#1" onclick="$j.facebox({ajax: '/community/content/email_friend_pane/15183753'}); return false;"><span style="position: relative;"><img alt="email to a friend" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/email-this.gif" title="email to a friend" /></span></a>&nbsp;|&nbsp;<span><a href="/printer_friendly/15183753" target="_blank"><img alt="print" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/print_icon.gif" title="print" /></a></span><span id="email_content_message_15183753" class="signature_email_message"></span></div>
@ -40,73 +49,49 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
#preprocess_regexps = [(re.compile(r'<span[^>]*addthis_separator*>'), lambda match: '') ] #preprocess_regexps = [(re.compile(r'<span[^>]*addthis_separator*>'), lambda match: '') ]
#preprocess_regexps = [(re.compile(r'span class="addthis_separator">|</span>'), lambda match: '') ] #preprocess_regexps = [(re.compile(r'span class="addthis_separator">|</span>'), lambda match: '') ]
#preprocess_regexps = [ #preprocess_regexps = [
# (re.compile(r'<start>.*?<end>', re.IGNORECASE | re.DOTALL), lambda match : ''), # (re.compile(r'<start>.*?<end>', re.IGNORECASE | re.DOTALL), lambda match : ''),
# ] # ]
#def get_browser(self): #def get_browser(self):
#def preprocess_html(soup, first_fetch): #def preprocess_html(soup, first_fetch):
# date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'})) # date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
# return # return
#preprocess_regexps = [(re.compile(r'&nbsp;|.*?', re.DOTALL), lambda m: '')]
# Try to keep some tags - some might not be needed here
keep_only_tags = [ keep_only_tags = [
#date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'})), #dict(name='div', attrs={'class':'hnews hentry item'}),
dict(name='div', attrs={'class':'hnews hentry item'}),
dict(name='div', attrs={'class':'story_item_headline entry-title'}), dict(name='div', attrs={'class':'story_item_headline entry-title'}),
#dict(name='div', attrs={'class':'story_item_author'}),
#dict(name='span', attrs={'class':'story_item_date updated'}), #dict(name='span', attrs={'class':'story_item_date updated'}),
#dict(name='div', attrs={'class':'story_item_author'}),
dict(name='div', attrs={'class':'full_story'}) dict(name='div', attrs={'class':'full_story'})
] ]
#remove_tags = [
# dict(name='div', attrs={'class':'story_tools'}),
# dict(name='p', attrs={'class':'ad_label'}),
# ]
# Try to remove some bothersome tags
remove_tags = [ remove_tags = [
# Try getting rid of some signature_line (date line) stuff
#dict(name='img', attrs={'alt'}), #dict(name='img', attrs={'alt'}),
dict(name='img', attrs={'class':'dont_touch_me'}), dict(name='img', attrs={'class':'dont_touch_me'}),
dict(name='span', attrs={'class':'number_recommendations'}), dict(name='span', attrs={'class':'number_recommendations'}),
#dict(name='div', attrs={'class':'signature_line'}), #dict(name='div', attrs={'class':'signature_line'}),
# Removes div within <!-- AddThis Button BEGIN --> <!-- AddThis Button END -->
dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
dict(name='div', attrs={'class':['addthis_toolbox','addthis_default_style']}),
dict(name='span', attrs={'class':'addthis_separator'}),
dict(name='div', attrs={'class':'related_content'}), dict(name='div', attrs={'class':'related_content'}),
dict(name='div', attrs={'class':'comments_container'}),
#dict(name='div', attrs={'class':'signature_line'}),
dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
dict(name='div', attrs={'id':'comments_container'}) dict(name='div', attrs={'id':'comments_container'})
] ]
# This one works but only gets title, date and clips article content!
#remove_tags_after = [
# dict(name='span', attrs={'class':'story_item_date updated'})
# ]
#remove_tags_after = [
# dict(name='div', attrs={'class':'advertisement'}),
# ]
# Try clipping tags before and after to prevent pulling img views/posts numbers after date?
#remove_tags_before = [
# dict(name='span', attrs={'class':'story_item_date updated'})
# ]
#extra_css # tweak the appearance # TODO: Change article titles <h2?> to bold?
# Comment-out or uncomment any of the following RSS feeds according to your # Comment-out or uncomment any of the following RSS feeds according to your
# liking. # liking.
# #
# TODO: Adding more then one RSS Feed, and newline will be omitted for # TODO: Some random bits of text might be trailing the last page (or TOC on
# entries within the Table of Contents or Index of Articles # MOBI files), these are bits of public posts and comments and need to also
# # be removed.
# TODO: Some random bits of text is trailing the last page (or TOC on MOBI
# files), these are bits of public posts and comments and need to also be
# removed.
# #
feeds = [ feeds = [
(u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'), (u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'),
@ -114,15 +99,15 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
(u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'), (u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'),
(u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'), (u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
(u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'), (u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
# (u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'), (u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),
(u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'), (u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
# (u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'), (u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
# (u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'), #(u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),
(u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'), (u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
# (u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'), (u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
# (u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'), (u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
# (u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'), #(u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),
# (u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'), (u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'),
(u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin') #(u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin')
] ]

86
recipes/hackernews.recipe Normal file
View File

@ -0,0 +1,86 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
'''
Hacker News
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from urlparse import urlparse
class HackerNews(BasicNewsRecipe):
title = 'Hacker News'
__author__ = 'Tom Scholl'
description = u'Hacker News, run by Y Combinator. Anything that good hackers would find interesting, with a focus on programming and startups.'
publisher = 'Y Combinator'
category = 'news, programming, it, technology'
masthead_url = 'http://i55.tinypic.com/2u6io76.png'
cover_url = 'http://i55.tinypic.com/2u6io76.png'
delay = 1
max_articles_per_feed = 30
use_embedded_content = False
no_stylesheets = True
encoding = 'utf-8'
language = 'en'
requires_version = (0,8,16)
feeds = [
(u'Hacker News', 'http://news.ycombinator.com/rss')
]
temp_files = []
articles_are_obfuscated = True
def get_readable_content(self, url):
self.log('get_readable_content(' + url + ')')
br = self.get_browser()
f = br.open(url)
html = f.read()
f.close()
return self.extract_readable_article(html, url)
def get_hn_content(self, url):
self.log('get_hn_content(' + url + ')')
# this could be improved
br = self.get_browser()
f = br.open(url)
html = f.read()
f.close()
return html
def get_obfuscated_article(self, url):
if url.startswith('http://news.ycombinator.com'):
content = self.get_hn_content(url)
else:
# TODO: use content-type header instead of url
is_image = False
for ext in ['.jpg', '.png', '.svg', '.gif', '.jpeg', '.tiff', '.bmp',]:
if url.endswith(ext):
is_image = True
break
if is_image:
self.log('using image_content (' + url + ')')
content = u'<html><body><img src="' + url + u'"></body></html>'
else:
content = self.get_readable_content(url)
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(content)
self.temp_files[-1].close()
return self.temp_files[-1].name
def is_link_wanted(self, url, tag):
if url.endswith('.pdf'):
return False
return True
def prettyify_url(self, url):
return urlparse(url).hostname
def populate_article_metadata(self, article, soup, first):
article.text_summary = self.prettyify_url(article.url)
article.summary = article.text_summary

View File

@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class SHaber (BasicNewsRecipe):
title = u'Samanyolu Haber'
__author__ = u'thomass'
description = ' Samanyolu Haber Sitesinden günlük haberler '
oldest_article =2
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
publisher = 'thomass'
category = 'güncel, haber, türkçe'
language = 'tr'
publication_type = 'newspaper'
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
extra_css = ' .Haber-Baslik-Yazisi {font-weight: bold; font-size: 9px} .Haber-Ozet-Yazisi{ font-family:sans-serif;font-weight: normal;font-size: 11px } #Haber{ font-family:sans-serif;font-weight: normal;font-size: 9px }.KirmiziText{ font-weight: normal;font-size: 5px }' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
keep_only_tags = [dict(name='div', attrs={'class':['Haber-Baslik-Yazisi','Haber-Ozet-Yazisi']}),dict(name='div', attrs={'id':['ctl00_ContentPlaceHolder1_imagenew','Haber']})]#,dict(name='h6', attrs={'class':['KirmiziText',]}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
cover_img_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
masthead_url = 'http://static.samanyoluhaber.com/Images/resources/images/samanyoluhaber-yazi-logo.png'
remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'Son Dakika', u'http://podcast.samanyoluhaber.com/sondakika.rss'),
( u'Gündem', u'http://podcast.samanyoluhaber.com/gundem.rss'),
( u'Politika ', u'http://podcast.samanyoluhaber.com/politika.rss'),
( u'Ekonomi', u'http://podcast.samanyoluhaber.com/ekonomi.rss'),
( u'Dünya', u'http://podcast.samanyoluhaber.com/dunya.rss'),
( u'Spor ', u'http://podcast.samanyoluhaber.com/spor.rss'),
( u'Sağlık', u'http://podcast.samanyoluhaber.com/saglik.rss'),
( u'Kültür', u'http://podcast.samanyoluhaber.com/kultur.rss'),
#( u'Teknoloji ', u'http://podcast.samanyoluhaber.com/teknoloji.rss'),
( u'Eğitim', u'http://podcast.samanyoluhaber.com/egitim.rss'),
( u'Ramazan', u'http://podcast.samanyoluhaber.com/ramazan.rss'),
( u'Yazarlar ', u'http://podcast.samanyoluhaber.com/yazarlar.rss'),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
# def print_version(self, url):
# return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class SHaberTekno (BasicNewsRecipe):
title = u'Samanyolu Teknoloji'
__author__ = u'thomass'
description = 'Samanyolu Teknoloji Haber Sitesinden haberler '
oldest_article =8
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
publisher = 'thomass'
category = 'bilim, teknoloji, haber, türkçe'
language = 'tr'
publication_type = 'magazine'
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
extra_css = ' .IcerikMetin{ font-family:sans-serif;font-weight: normal;font-size: 10px } .h1IcerikBaslik {font-weight: bold; font-size: 18px}' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
keep_only_tags = [dict(name='div', attrs={'class':['IcerikBaslik','IcerikMetinDiv']})]#,dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ]
cover_img_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg'
masthead_url = 'http://teknoloji.samanyoluhaber.com/resources/images/logo_s_digi.jpg'
remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'GENEL', u'http://podcast.samanyoluhaber.com/Teknoloji.rss'),
( u'İNTERNET', u'http://open.dapper.net/services/shaberteknolojiinternet'),
( u'CEP TELEFONU', u'http://open.dapper.net/services/shaberteknolojicep'),
( u'OYUN', u'http://open.dapper.net/services/shaberteknolojioyun'),
( u'DONANIM', u'http://open.dapper.net/services/httpopendappernetservicesshaberteknolojidonanim'),
( u'ÜRÜN İNCELEME', u'http://open.dapper.net/services/shaberteknolojiurun'),
( u'ALIŞVERİŞ', u'http://open.dapper.net/services/shaberteknolojialisveris'),
( u'BİLİM & TEKNOLOJİ', u'http://open.dapper.net/services/shaberteknolojibilim'),
( u'HABERLER', u'http://open.dapper.net/services/shaberteknolojihaber'),
]
# def print_version(self, url):
# return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')

View File

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Star (BasicNewsRecipe):
title = u'Star Gazetesi'
__author__ = u'thomass'
description = 'yeni Türkiye''nin Gazetesi'
oldest_article =2
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
publisher = 'thomass'
category = 'güncel, haber, türkçe'
language = 'tr'
publication_type = 'newspaper'
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
extra_css = ' .font8{font-weight: bold; font-size:20px}.font11{font-weight: normal; font-size:small}#hdetay{ font-family:sans-serif;font-size: 9px }' #.story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
keep_only_tags = [dict(name='div', attrs={'class':['font8']}),dict(name='span', attrs={'class':['font11']}),dict(name='div', attrs={'id':['hdetay']})]#,,dict(name='h6', attrs={'class':['KirmiziText']}) dict(name='div', attrs={'id':['Haber']}),dict(name='div', attrs={'id':['gallery']})]
#remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ],dict(name='h1', attrs={'class':['H1-Haber-DetayBasligi']}),dict(name='h4', attrs={'class':['BrownText']}) ,
cover_img_url = 'http://www.stargazete.com/starnew/img/starlogo.png'
masthead_url = 'http://www.stargazete.com/starnew/img/starlogo.png'
remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'MANSET', u'http://open.dapper.net/services/starmanset'),
( u'GÜNCEL', u'http://www.stargazete.com/guncel.xml'),
( u'POLİTİKA', u'http://www.stargazete.com/politika.xml'),
( u' EKONOMİ', u'http://www.stargazete.com/ekonomi.xml'),
( u'DÜNYA', u'http://www.stargazete.com/dunya.xml'),
( u'YAZARLAR', u'http://www.stargazete.com/gazeteyazarlar.xml'),
( u'SPOR', u'http://www.stargazete.com/spor.xml'),
( u'SPOR YAZARLARI', u'http://www.stargazete.com/index.php?metot=rss&islem=sporyazarlar'),
( u'SİNEMA', u'http://www.stargazete.com/sinema.xml'),
( u'KADIN&SAĞLIK', u'http://www.stargazete.com/kadinsaglik.xml'),
( u' STARTEK', u'http://www.stargazete.com/startek.xml'),
( u' AÇIK GÖRÜŞ', u'http://www.stargazete.com/acikgorus.xml'),
( u'Star PAZAR', u'http://www.stargazete.com/pazar.xml'),
( u'Star CUMARTESİ', u'http://www.stargazete.com/cumartesi.xml'),
]
def preprocess_html(self, soup):#remove links
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
#def print_version(self, url):
#return url.replace('/', 'http://www.stargazete.com/')

File diff suppressed because it is too large Load Diff

3
setup/iso_639/README Normal file
View File

@ -0,0 +1,3 @@
These files are taken from the iso-codes package, licensed under the LGPL 2.1
All the files are from the iso_639_3 sub-directory.

34266
setup/iso_639/af.po Normal file

File diff suppressed because it is too large Load Diff

33173
setup/iso_639/am.po Normal file

File diff suppressed because it is too large Load Diff

33365
setup/iso_639/ar.po Normal file

File diff suppressed because it is too large Load Diff

33394
setup/iso_639/az.po Normal file

File diff suppressed because it is too large Load Diff

30849
setup/iso_639/bg.po Normal file

File diff suppressed because it is too large Load Diff

33998
setup/iso_639/bn_IN.po Normal file

File diff suppressed because it is too large Load Diff

30899
setup/iso_639/br.po Normal file

File diff suppressed because it is too large Load Diff

33078
setup/iso_639/bs.po Normal file

File diff suppressed because it is too large Load Diff

33173
setup/iso_639/byn.po Normal file

File diff suppressed because it is too large Load Diff

33938
setup/iso_639/ca.po Normal file

File diff suppressed because it is too large Load Diff

30926
setup/iso_639/crh.po Normal file

File diff suppressed because it is too large Load Diff

33542
setup/iso_639/cs.po Normal file

File diff suppressed because it is too large Load Diff

34256
setup/iso_639/cy.po Normal file

File diff suppressed because it is too large Load Diff

34952
setup/iso_639/da.po Normal file

File diff suppressed because it is too large Load Diff

35241
setup/iso_639/de.po Normal file

File diff suppressed because it is too large Load Diff

32435
setup/iso_639/el.po Normal file

File diff suppressed because it is too large Load Diff

34969
setup/iso_639/eo.po Normal file

File diff suppressed because it is too large Load Diff

34978
setup/iso_639/es.po Normal file

File diff suppressed because it is too large Load Diff

33392
setup/iso_639/et.po Normal file

File diff suppressed because it is too large Load Diff

33195
setup/iso_639/eu.po Normal file

File diff suppressed because it is too large Load Diff

34699
setup/iso_639/fa.po Normal file

File diff suppressed because it is too large Load Diff

35015
setup/iso_639/fi.po Normal file

File diff suppressed because it is too large Load Diff

30860
setup/iso_639/fr.po Normal file

File diff suppressed because it is too large Load Diff

35031
setup/iso_639/ga.po Normal file

File diff suppressed because it is too large Load Diff

33173
setup/iso_639/gez.po Normal file

File diff suppressed because it is too large Load Diff

30865
setup/iso_639/gl.po Normal file

File diff suppressed because it is too large Load Diff

30893
setup/iso_639/gu.po Normal file

File diff suppressed because it is too large Load Diff

33228
setup/iso_639/he.po Normal file

File diff suppressed because it is too large Load Diff

33178
setup/iso_639/hi.po Normal file

File diff suppressed because it is too large Load Diff

33298
setup/iso_639/hr.po Normal file

File diff suppressed because it is too large Load Diff

35252
setup/iso_639/hu.po Normal file

File diff suppressed because it is too large Load Diff

34070
setup/iso_639/id.po Normal file

File diff suppressed because it is too large Load Diff

33229
setup/iso_639/is.po Normal file

File diff suppressed because it is too large Load Diff

39178
setup/iso_639/iso_639_3.xml Normal file

File diff suppressed because it is too large Load Diff

30851
setup/iso_639/it.po Normal file

File diff suppressed because it is too large Load Diff

43196
setup/iso_639/ja.po Normal file

File diff suppressed because it is too large Load Diff

30927
setup/iso_639/kn.po Normal file

File diff suppressed because it is too large Load Diff

33380
setup/iso_639/ko.po Normal file

File diff suppressed because it is too large Load Diff

33153
setup/iso_639/kok.po Normal file

File diff suppressed because it is too large Load Diff

30897
setup/iso_639/lt.po Normal file

File diff suppressed because it is too large Load Diff

33227
setup/iso_639/lv.po Normal file

File diff suppressed because it is too large Load Diff

33247
setup/iso_639/mi.po Normal file

File diff suppressed because it is too large Load Diff

33236
setup/iso_639/mk.po Normal file

File diff suppressed because it is too large Load Diff

34585
setup/iso_639/mn.po Normal file

File diff suppressed because it is too large Load Diff

30867
setup/iso_639/mr.po Normal file

File diff suppressed because it is too large Load Diff

33374
setup/iso_639/ms.po Normal file

File diff suppressed because it is too large Load Diff

34967
setup/iso_639/mt.po Normal file

File diff suppressed because it is too large Load Diff

34114
setup/iso_639/nb.po Normal file

File diff suppressed because it is too large Load Diff

34201
setup/iso_639/nl.po Normal file

File diff suppressed because it is too large Load Diff

33810
setup/iso_639/nn.po Normal file

File diff suppressed because it is too large Load Diff

33062
setup/iso_639/nso.po Normal file

File diff suppressed because it is too large Load Diff

30863
setup/iso_639/oc.po Normal file

File diff suppressed because it is too large Load Diff

30863
setup/iso_639/or.po Normal file

File diff suppressed because it is too large Load Diff

30932
setup/iso_639/pa.po Normal file

File diff suppressed because it is too large Load Diff

30850
setup/iso_639/pl.po Normal file

File diff suppressed because it is too large Load Diff

31615
setup/iso_639/ps.po Normal file

File diff suppressed because it is too large Load Diff

34936
setup/iso_639/pt.po Normal file

File diff suppressed because it is too large Load Diff

32497
setup/iso_639/pt_BR.po Normal file

File diff suppressed because it is too large Load Diff

33391
setup/iso_639/ro.po Normal file

File diff suppressed because it is too large Load Diff

34947
setup/iso_639/ru.po Normal file

File diff suppressed because it is too large Load Diff

34991
setup/iso_639/rw.po Normal file

File diff suppressed because it is too large Load Diff

33228
setup/iso_639/sk.po Normal file

File diff suppressed because it is too large Load Diff

34187
setup/iso_639/sl.po Normal file

File diff suppressed because it is too large Load Diff

35064
setup/iso_639/sr.po Normal file

File diff suppressed because it is too large Load Diff

35064
setup/iso_639/sr@latin.po Normal file

File diff suppressed because it is too large Load Diff

35702
setup/iso_639/sv.po Normal file

File diff suppressed because it is too large Load Diff

30933
setup/iso_639/ta.po Normal file

File diff suppressed because it is too large Load Diff

30938
setup/iso_639/th.po Normal file

File diff suppressed because it is too large Load Diff

33173
setup/iso_639/ti.po Normal file

File diff suppressed because it is too large Load Diff

33173
setup/iso_639/tig.po Normal file

File diff suppressed because it is too large Load Diff

34572
setup/iso_639/tr.po Normal file

File diff suppressed because it is too large Load Diff

33506
setup/iso_639/tt.po Normal file

File diff suppressed because it is too large Load Diff

30851
setup/iso_639/uk.po Normal file

File diff suppressed because it is too large Load Diff

34473
setup/iso_639/ve.po Normal file

File diff suppressed because it is too large Load Diff

34958
setup/iso_639/vi.po Normal file

File diff suppressed because it is too large Load Diff

34192
setup/iso_639/wa.po Normal file

File diff suppressed because it is too large Load Diff

33142
setup/iso_639/xh.po Normal file

File diff suppressed because it is too large Load Diff

33214
setup/iso_639/zh_CN.po Normal file

File diff suppressed because it is too large Load Diff

33204
setup/iso_639/zh_TW.po Normal file

File diff suppressed because it is too large Load Diff

33228
setup/iso_639/zu.po Normal file

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, tempfile, shutil, subprocess, glob, re, time, textwrap import os, tempfile, shutil, subprocess, glob, re, time, textwrap
from distutils import sysconfig
from functools import partial from functools import partial
from setup import Command, __appname__, __version__ from setup import Command, __appname__, __version__
@ -142,19 +141,18 @@ class Translations(POT): # {{{
os.makedirs(base) os.makedirs(base)
self.info('\tCompiling translations for', locale) self.info('\tCompiling translations for', locale)
subprocess.check_call(['msgfmt', '-o', dest, f]) subprocess.check_call(['msgfmt', '-o', dest, f])
if locale in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', 'ltg', 'nds', 'te', 'yi'): iscpo = {'bn':'bn_IN', 'zh_HK':'zh_CN'}.get(locale, locale)
continue iso639 = self.j(self.d(self.SRC), 'setup', 'iso_639',
pycountry = self.j(sysconfig.get_python_lib(), 'pycountry', '%s.po'%iscpo)
'locales', locale, 'LC_MESSAGES')
if os.path.exists(pycountry): if os.path.exists(iso639):
iso639 = self.j(pycountry, 'iso639.mo') dest = self.j(self.d(dest), 'iso639.mo')
dest = self.j(self.d(dest), self.b(iso639)) if self.newer(dest, iso639):
if self.newer(dest, iso639) and os.path.exists(iso639):
self.info('\tCopying ISO 639 translations') self.info('\tCopying ISO 639 translations')
shutil.copy2(iso639, dest) subprocess.check_call(['msgfmt', '-o', dest, iso639])
else: elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc',
self.warn('No ISO 639 translations for locale:', locale, 'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml'):
'\nDo you have pycountry installed?') self.warn('No ISO 639 translations for locale:', locale)
self.write_stats() self.write_stats()
self.freeze_locales() self.freeze_locales()
@ -212,7 +210,7 @@ class Translations(POT): # {{{
# }}} # }}}
class GetTranslations(Translations): class GetTranslations(Translations): # {{{
description = 'Get updated translations from Launchpad' description = 'Get updated translations from Launchpad'
BRANCH = 'lp:~kovid/calibre/translations' BRANCH = 'lp:~kovid/calibre/translations'
@ -273,24 +271,25 @@ class GetTranslations(Translations):
subprocess.check_call(['bzr', 'commit', '-m', subprocess.check_call(['bzr', 'commit', '-m',
'IGN:Translation corrections', cls.PATH]) 'IGN:Translation corrections', cls.PATH])
# }}}
class ISO639(Command): class ISO639(Command): # {{{
description = 'Compile translations for ISO 639 codes' description = 'Compile translations for ISO 639 codes'
DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization', DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
'iso639.pickle') 'iso639.pickle')
def run(self, opts): def run(self, opts):
src = self.j(self.d(self.SRC), 'setup', 'iso639.xml') src = self.j(self.d(self.SRC), 'setup', 'iso_639')
if not os.path.exists(src): if not os.path.exists(src):
raise Exception(src + ' does not exist') raise Exception(src + ' does not exist')
dest = self.DEST dest = self.DEST
if not self.newer(dest, src): if not self.newer(dest, [src, __file__]):
self.info('Pickled code is up to date') self.info('Pickled code is up to date')
return return
self.info('Pickling ISO-639 codes to', dest) self.info('Pickling ISO-639 codes to', dest)
from lxml import etree from lxml import etree
root = etree.fromstring(open(src, 'rb').read()) root = etree.fromstring(open(self.j(src, 'iso_639_3.xml'), 'rb').read())
by_2 = {} by_2 = {}
by_3b = {} by_3b = {}
by_3t = {} by_3t = {}
@ -298,12 +297,16 @@ class ISO639(Command):
m3to2 = {} m3to2 = {}
m3bto3t = {} m3bto3t = {}
nm = {} nm = {}
codes2, codes3t, codes3b = set([]), set([]), set([]) codes2, codes3t, codes3b = set(), set(), set()
for x in root.xpath('//iso_639_entry'): for x in root.xpath('//iso_639_3_entry'):
two = x.get('part1_code', None)
threet = x.get('id')
threeb = x.get('part2_code', None)
if threeb is None:
# Only recognize langauges in ISO-639-2
continue
name = x.get('name') name = x.get('name')
two = x.get('iso_639_1_code', None)
threeb = x.get('iso_639_2B_code')
threet = x.get('iso_639_2T_code')
if two is not None: if two is not None:
by_2[two] = name by_2[two] = name
codes2.add(two) codes2.add(two)
@ -313,13 +316,10 @@ class ISO639(Command):
by_3t[threet] = name by_3t[threet] = name
if threeb != threet: if threeb != threet:
m3bto3t[threeb] = threet m3bto3t[threeb] = threet
codes3b.add(x.get('iso_639_2B_code')) codes3b.add(threeb)
codes3t.add(x.get('iso_639_2T_code')) codes3t.add(threet)
base_name = name.lower() base_name = name.lower()
nm[base_name] = threet nm[base_name] = threet
simple_name = base_name.partition(';')[0].strip()
if simple_name not in nm:
nm[simple_name] = threet
from cPickle import dump from cPickle import dump
x = {'by_2':by_2, 'by_3b':by_3b, 'by_3t':by_3t, 'codes2':codes2, x = {'by_2':by_2, 'by_3b':by_3b, 'by_3t':by_3t, 'codes2':codes2,
@ -331,4 +331,5 @@ class ISO639(Command):
if os.path.exists(self.DEST): if os.path.exists(self.DEST):
os.remove(self.DEST) os.remove(self.DEST)
# }}}

View File

@ -19,11 +19,12 @@ class ANDROID(USBMS):
VENDOR_ID = { VENDOR_ID = {
# HTC # HTC
0x0bb4 : { 0xc02 : [0x100, 0x0227, 0x0226, 0x222], 0x0bb4 : { 0xc02 : [0x100, 0x0227, 0x0226, 0x222],
0xc01 : [0x100, 0x0227, 0x0226], 0xc01 : [0x100, 0x0227, 0x0226],
0xff9 : [0x0100, 0x0227, 0x0226], 0xff9 : [0x0100, 0x0227, 0x0226],
0xc87 : [0x0100, 0x0227, 0x0226], 0xc86 : [0x100, 0x0227, 0x0226, 0x222],
0xc91 : [0x0100, 0x0227, 0x0226], 0xc87 : [0x0100, 0x0227, 0x0226],
0xc91 : [0x0100, 0x0227, 0x0226],
0xc92 : [0x100, 0x0227, 0x0226, 0x222], 0xc92 : [0x100, 0x0227, 0x0226, 0x222],
0xc97 : [0x100, 0x0227, 0x0226, 0x222], 0xc97 : [0x100, 0x0227, 0x0226, 0x222],
0xc99 : [0x100, 0x0227, 0x0226, 0x222], 0xc99 : [0x100, 0x0227, 0x0226, 0x222],
@ -124,7 +125,7 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA', 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
'GENERIC-', 'ZTE', 'MID'] 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',

View File

@ -85,7 +85,8 @@ class Serializer(object):
spine_item(item).is_section_start = True spine_item(item).is_section_start = True
for i, article in enumerate(articles): for i, article in enumerate(articles):
si = spine_item(article) si = spine_item(article)
si.is_article_start = True if si is not None:
si.is_article_start = True
items = list(self.oeb.spine) items = list(self.oeb.spine)
in_sec = in_art = False in_sec = in_art = False
@ -116,7 +117,7 @@ class Serializer(object):
buf.write(b'</html>') buf.write(b'</html>')
self.end_offset = buf.tell() self.end_offset = buf.tell()
self.fixup_links() self.fixup_links()
if self.start_offset is None: if self.start_offset is None and not self.is_periodical:
# If we don't set a start offset, the stupid Kindle will # If we don't set a start offset, the stupid Kindle will
# open the book at the location of the first IndexEntry, which # open the book at the location of the first IndexEntry, which
# could be anywhere. So ensure the book is always opened at the # could be anywhere. So ensure the book is always opened at the

View File

@ -885,7 +885,8 @@ class Manifest(object):
try: try:
data = etree.fromstring(data, parser=parser) data = etree.fromstring(data, parser=parser)
except etree.XMLSyntaxError as err: except etree.XMLSyntaxError as err:
self.oeb.log.exception('Initial parse failed:') self.oeb.log.debug('Initial parse failed, using more'
' forgiving parsers')
repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0)) repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
data = ENTITY_RE.sub(repl, data) data = ENTITY_RE.sub(repl, data)
try: try:

View File

@ -0,0 +1,37 @@
This code is under the Apache License 2.0. http://www.apache.org/licenses/LICENSE-2.0
This is a python port of a ruby port of arc90's readability project, taken
from https://github.com/buriy/python-readability
The original readability project:
http://lab.arc90.com/experiments/readability/
In few words,
Given a html document, it pulls out the main body text and cleans it up.
It also can clean up title based on latest readability.js code.
Based on:
- Latest readability.js ( https://github.com/MHordecki/readability-redux/blob/master/readability/readability.js )
- Ruby port by starrhorne and iterationlabs
- Python port by gfxmonk ( https://github.com/gfxmonk/python-readability , based on BeautifulSoup )
- Decruft effort to move to lxml ( http://www.minvolai.com/blog/decruft-arc90s-readability-in-python/ )
- "BR to P" fix from readability.js which improves quality for smaller texts.
- Github users contributions.
Installation::
easy_install readability-lxml
or
pip install readability-lxml
Usage::
from readability.readability import Document
import urllib
html = urllib.urlopen(url).read()
readable_article = Document(html).summary()
readable_title = Document(html).short_title()
Command-line usage::
python -m readability.readability -u http://pypi.python.org/pypi/readability-lxml

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,32 @@
# strip out a set of nuisance html attributes that can mess up rendering in RSS feeds
import re
from lxml.html.clean import Cleaner
bad_attrs = ['width', 'height', 'style', '[-a-z]*color', 'background[-a-z]*', 'on*']
single_quoted = "'[^']+'"
double_quoted = '"[^"]+"'
non_space = '[^ "\'>]+'
htmlstrip = re.compile("<" # open
"([^>]+) " # prefix
"(?:%s) *" % ('|'.join(bad_attrs),) + # undesirable attributes
'= *(?:%s|%s|%s)' % (non_space, single_quoted, double_quoted) + # value
"([^>]*)" # postfix
">" # end
, re.I)
def clean_attributes(html):
while htmlstrip.search(html):
html = htmlstrip.sub('<\\1\\2>', html)
return html
def normalize_spaces(s):
if not s: return ''
"""replace any sequence of whitespace
characters with a single space"""
return ' '.join(s.split())
html_cleaner = Cleaner(scripts=True, javascript=True, comments=True,
style=True, links=True, meta=False, add_nofollow=False,
page_structure=False, processing_instructions=True, embedded=False,
frames=False, forms=False, annoying_tags=False, remove_tags=None,
remove_unknown_tags=False, safe_attrs_only=False)

View File

@ -0,0 +1,25 @@
def save_to_file(text, filename):
f = open(filename, 'wt')
f.write('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
f.write(text.encode('utf-8'))
f.close()
uids = {}
def describe(node, depth=2):
if not hasattr(node, 'tag'):
return "[%s]" % type(node)
name = node.tag
if node.get('id', ''): name += '#'+node.get('id')
if node.get('class', ''):
name += '.' + node.get('class').replace(' ','.')
if name[:4] in ['div#', 'div.']:
name = name[3:]
if name in ['tr', 'td', 'div', 'p']:
if not node in uids:
uid = uids[node] = len(uids)+1
else:
uid = uids.get(node)
name += "%02d" % (uid)
if depth and node.getparent() is not None:
return name+' - '+describe(node.getparent(), depth-1)
return name

View File

@ -0,0 +1,103 @@
import re
from lxml.html import tostring
import lxml.html
from calibre.ebooks.readability.cleaners import normalize_spaces, clean_attributes
from calibre.ebooks.chardet import xml_to_unicode
def build_doc(page):
page_unicode = xml_to_unicode(page, strip_encoding_pats=True)[0]
doc = lxml.html.document_fromstring(page_unicode)
return doc
def js_re(src, pattern, flags, repl):
return re.compile(pattern, flags).sub(src, repl.replace('$', '\\'))
def normalize_entities(cur_title):
entities = {
u'\u2014':'-',
u'\u2013':'-',
u'&mdash;': '-',
u'&ndash;': '-',
u'\u00A0': ' ',
u'\u00AB': '"',
u'\u00BB': '"',
u'&quot;': '"',
}
for c, r in entities.iteritems():
if c in cur_title:
cur_title = cur_title.replace(c, r)
return cur_title
def norm_title(title):
return normalize_entities(normalize_spaces(title))
def get_title(doc):
title = doc.find('.//title').text
if not title:
return '[no-title]'
return norm_title(title)
def add_match(collection, text, orig):
text = norm_title(text)
if len(text.split()) >= 2 and len(text) >= 15:
if text.replace('"', '') in orig.replace('"', ''):
collection.add(text)
def shorten_title(doc):
title = doc.find('.//title').text
if not title:
return ''
title = orig = norm_title(title)
candidates = set()
for item in ['.//h1', './/h2', './/h3']:
for e in list(doc.iterfind(item)):
if e.text:
add_match(candidates, e.text, orig)
if e.text_content():
add_match(candidates, e.text_content(), orig)
for item in ['#title', '#head', '#heading', '.pageTitle', '.news_title', '.title', '.head', '.heading', '.contentheading', '.small_header_red']:
for e in doc.cssselect(item):
if e.text:
add_match(candidates, e.text, orig)
if e.text_content():
add_match(candidates, e.text_content(), orig)
if candidates:
title = sorted(candidates, key=len)[-1]
else:
for delimiter in [' | ', ' - ', ' :: ', ' / ']:
if delimiter in title:
parts = orig.split(delimiter)
if len(parts[0].split()) >= 4:
title = parts[0]
break
elif len(parts[-1].split()) >= 4:
title = parts[-1]
break
else:
if ': ' in title:
parts = orig.split(': ')
if len(parts[-1].split()) >= 4:
title = parts[-1]
else:
title = orig.split(': ', 1)[1]
if not 15 < len(title) < 150:
return orig
return title
def get_body(doc):
[ elem.drop_tree() for elem in doc.xpath('.//script | .//link | .//style') ]
raw_html = unicode(tostring(doc.body or doc))
return clean_attributes(raw_html)

View File

@ -0,0 +1,513 @@
import re, sys
from collections import defaultdict
from lxml.etree import tostring
from lxml.html import (fragment_fromstring, document_fromstring,
tostring as htostring)
from calibre.ebooks.readability.htmls import build_doc, get_body, get_title, shorten_title
from calibre.ebooks.readability.cleaners import html_cleaner, clean_attributes
def tounicode(tree_or_node, **kwargs):
kwargs['encoding'] = unicode
return htostring(tree_or_node, **kwargs)
REGEXES = {
'unlikelyCandidatesRe': re.compile('combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter',re.I),
'okMaybeItsACandidateRe': re.compile('and|article|body|column|main|shadow',re.I),
'positiveRe': re.compile('article|body|content|entry|hentry|main|page|pagination|post|text|blog|story',re.I),
'negativeRe': re.compile('combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget',re.I),
'divToPElementsRe': re.compile('<(a|blockquote|dl|div|img|ol|p|pre|table|ul)',re.I),
#'replaceBrsRe': re.compile('(<br[^>]*>[ \n\r\t]*){2,}',re.I),
#'replaceFontsRe': re.compile('<(\/?)font[^>]*>',re.I),
#'trimRe': re.compile('^\s+|\s+$/'),
#'normalizeRe': re.compile('\s{2,}/'),
#'killBreaksRe': re.compile('(<br\s*\/?>(\s|&nbsp;?)*){1,}/'),
#'videoRe': re.compile('http:\/\/(www\.)?(youtube|vimeo)\.com', re.I),
#skipFootnoteLink: /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
}
def describe(node, depth=1):
if not hasattr(node, 'tag'):
return "[%s]" % type(node)
name = node.tag
if node.get('id', ''): name += '#'+node.get('id')
if node.get('class', ''):
name += '.' + node.get('class').replace(' ','.')
if name[:4] in ['div#', 'div.']:
name = name[3:]
if depth and node.getparent() is not None:
return name+' - '+describe(node.getparent(), depth-1)
return name
def to_int(x):
if not x: return None
x = x.strip()
if x.endswith('px'):
return int(x[:-2])
if x.endswith('em'):
return int(x[:-2]) * 12
return int(x)
def clean(text):
text = re.sub('\s*\n\s*', '\n', text)
text = re.sub('[ \t]{2,}', ' ', text)
return text.strip()
def text_length(i):
return len(clean(i.text_content() or ""))
class Unparseable(ValueError):
pass
class Document:
TEXT_LENGTH_THRESHOLD = 25
RETRY_LENGTH = 250
def __init__(self, input, log, **options):
self.input = input
self.options = defaultdict(lambda: None)
for k, v in options.items():
self.options[k] = v
self.html = None
self.log = log
def _html(self, force=False):
if force or self.html is None:
self.html = self._parse(self.input)
return self.html
def _parse(self, input):
doc = build_doc(input)
doc = html_cleaner.clean_html(doc)
base_href = self.options['url']
if base_href:
doc.make_links_absolute(base_href, resolve_base_href=True)
else:
doc.resolve_base_href()
return doc
def content(self):
return get_body(self._html(True))
def title(self):
return get_title(self._html(True))
def short_title(self):
return shorten_title(self._html(True))
def summary(self):
try:
ruthless = True
while True:
self._html(True)
for i in self.tags(self.html, 'script', 'style'):
i.drop_tree()
for i in self.tags(self.html, 'body'):
i.set('id', 'readabilityBody')
if ruthless:
self.remove_unlikely_candidates()
self.transform_misused_divs_into_paragraphs()
candidates = self.score_paragraphs()
best_candidate = self.select_best_candidate(candidates)
if best_candidate:
article = self.get_article(candidates, best_candidate)
else:
if ruthless:
self.log.debug("ruthless removal did not work. ")
ruthless = False
self.debug("ended up stripping too much - going for a safer _parse")
# try again
continue
else:
self.log.debug("Ruthless and lenient parsing did not work. Returning raw html")
article = self.html.find('body')
if article is None:
article = self.html
cleaned_article = self.sanitize(article, candidates)
of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)
if ruthless and not of_acceptable_length:
ruthless = False
continue # try again
else:
return cleaned_article
except StandardError, e:
self.log.exception('error getting summary: ' )
raise Unparseable(str(e)), None, sys.exc_info()[2]
def get_article(self, candidates, best_candidate):
# Now that we have the top candidate, look through its siblings for content that might also be related.
# Things like preambles, content split by ads that we removed, etc.
sibling_score_threshold = max([10, best_candidate['content_score'] * 0.2])
output = document_fromstring('<div/>')
parent = output.xpath('//div')[0]
best_elem = best_candidate['elem']
for sibling in best_elem.getparent().getchildren():
#if isinstance(sibling, NavigableString): continue#in lxml there no concept of simple text
append = False
if sibling is best_elem:
append = True
sibling_key = sibling #HashableElement(sibling)
if sibling_key in candidates and candidates[sibling_key]['content_score'] >= sibling_score_threshold:
append = True
if sibling.tag == "p":
link_density = self.get_link_density(sibling)
node_content = sibling.text or ""
node_length = len(node_content)
if node_length > 80 and link_density < 0.25:
append = True
elif node_length < 80 and link_density == 0 and re.search('\.( |$)', node_content):
append = True
if append:
parent.append(sibling)
#if output is not None:
# output.append(best_elem)
return output.find('body')
def select_best_candidate(self, candidates):
sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)
for candidate in sorted_candidates[:5]:
elem = candidate['elem']
self.debug("Top 5 : %6.3f %s" % (candidate['content_score'], describe(elem)))
if len(sorted_candidates) == 0:
return None
best_candidate = sorted_candidates[0]
return best_candidate
def get_link_density(self, elem):
link_length = 0
for i in elem.findall(".//a"):
link_length += text_length(i)
#if len(elem.findall(".//div") or elem.findall(".//p")):
# link_length = link_length
total_length = text_length(elem)
return float(link_length) / max(total_length, 1)
def score_paragraphs(self, ):
MIN_LEN = self.options.get('min_text_length', self.TEXT_LENGTH_THRESHOLD)
candidates = {}
#self.debug(str([describe(node) for node in self.tags(self.html, "div")]))
ordered = []
for elem in self.tags(self.html, "p", "pre", "td"):
parent_node = elem.getparent()
if parent_node is None:
continue
grand_parent_node = parent_node.getparent()
inner_text = clean(elem.text_content() or "")
inner_text_len = len(inner_text)
# If this paragraph is less than 25 characters, don't even count it.
if inner_text_len < MIN_LEN:
continue
if parent_node not in candidates:
candidates[parent_node] = self.score_node(parent_node)
ordered.append(parent_node)
if grand_parent_node is not None and grand_parent_node not in candidates:
candidates[grand_parent_node] = self.score_node(grand_parent_node)
ordered.append(grand_parent_node)
content_score = 1
content_score += len(inner_text.split(','))
content_score += min((inner_text_len / 100), 3)
#if elem not in candidates:
# candidates[elem] = self.score_node(elem)
#WTF? candidates[elem]['content_score'] += content_score
candidates[parent_node]['content_score'] += content_score
if grand_parent_node is not None:
candidates[grand_parent_node]['content_score'] += content_score / 2.0
# Scale the final candidates score based on link density. Good content should have a
# relatively small link density (5% or less) and be mostly unaffected by this operation.
for elem in ordered:
candidate = candidates[elem]
ld = self.get_link_density(elem)
score = candidate['content_score']
self.debug("Candid: %6.3f %s link density %.3f -> %6.3f" % (score, describe(elem), ld, score*(1-ld)))
candidate['content_score'] *= (1 - ld)
return candidates
def class_weight(self, e):
weight = 0
if e.get('class', None):
if REGEXES['negativeRe'].search(e.get('class')):
weight -= 25
if REGEXES['positiveRe'].search(e.get('class')):
weight += 25
if e.get('id', None):
if REGEXES['negativeRe'].search(e.get('id')):
weight -= 25
if REGEXES['positiveRe'].search(e.get('id')):
weight += 25
return weight
def score_node(self, elem):
content_score = self.class_weight(elem)
name = elem.tag.lower()
if name == "div":
content_score += 5
elif name in ["pre", "td", "blockquote"]:
content_score += 3
elif name in ["address", "ol", "ul", "dl", "dd", "dt", "li", "form"]:
content_score -= 3
elif name in ["h1", "h2", "h3", "h4", "h5", "h6", "th"]:
content_score -= 5
return {
'content_score': content_score,
'elem': elem
}
def debug(self, *a):
#if self.options['debug']:
self.log.debug(*a)
def remove_unlikely_candidates(self):
for elem in self.html.iter():
s = "%s %s" % (elem.get('class', ''), elem.get('id', ''))
#self.debug(s)
if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag != 'body':
self.debug("Removing unlikely candidate - %s" % describe(elem))
elem.drop_tree()
def transform_misused_divs_into_paragraphs(self):
for elem in self.tags(self.html, 'div'):
# transform <div>s that do not contain other block elements into <p>s
if not REGEXES['divToPElementsRe'].search(unicode(''.join(map(tostring, list(elem))))):
#self.debug("Altering %s to p" % (describe(elem)))
elem.tag = "p"
#print "Fixed element "+describe(elem)
for elem in self.tags(self.html, 'div'):
if elem.text and elem.text.strip():
p = fragment_fromstring('<p/>')
p.text = elem.text
elem.text = None
elem.insert(0, p)
#print "Appended "+tounicode(p)+" to "+describe(elem)
for pos, child in reversed(list(enumerate(elem))):
if child.tail and child.tail.strip():
p = fragment_fromstring('<p/>')
p.text = child.tail
child.tail = None
elem.insert(pos + 1, p)
#print "Inserted "+tounicode(p)+" to "+describe(elem)
if child.tag == 'br':
#print 'Dropped <br> at '+describe(elem)
child.drop_tree()
def tags(self, node, *tag_names):
for tag_name in tag_names:
for e in node.findall('.//%s' % tag_name):
yield e
def reverse_tags(self, node, *tag_names):
for tag_name in tag_names:
for e in reversed(node.findall('.//%s' % tag_name)):
yield e
def sanitize(self, node, candidates):
MIN_LEN = self.options.get('min_text_length', self.TEXT_LENGTH_THRESHOLD)
for header in self.tags(node, "h1", "h2", "h3", "h4", "h5", "h6"):
if self.class_weight(header) < 0 or self.get_link_density(header) > 0.33:
header.drop_tree()
for elem in self.tags(node, "form", "iframe", "textarea"):
elem.drop_tree()
allowed = {}
# Conditionally clean <table>s, <ul>s, and <div>s
for el in self.reverse_tags(node, "table", "ul", "div"):
if el in allowed:
continue
weight = self.class_weight(el)
if el in candidates:
content_score = candidates[el]['content_score']
#print '!',el, '-> %6.3f' % content_score
else:
content_score = 0
tag = el.tag
if weight + content_score < 0:
self.debug("Cleaned %s with score %6.3f and weight %-3s" %
(describe(el), content_score, weight, ))
el.drop_tree()
elif el.text_content().count(",") < 10:
counts = {}
for kind in ['p', 'img', 'li', 'a', 'embed', 'input']:
counts[kind] = len(el.findall('.//%s' %kind))
counts["li"] -= 100
content_length = text_length(el) # Count the text length excluding any surrounding whitespace
link_density = self.get_link_density(el)
parent_node = el.getparent()
if parent_node is not None:
if parent_node in candidates:
content_score = candidates[parent_node]['content_score']
else:
content_score = 0
#if parent_node is not None:
#pweight = self.class_weight(parent_node) + content_score
#pname = describe(parent_node)
#else:
#pweight = 0
#pname = "no parent"
to_remove = False
reason = ""
#if el.tag == 'div' and counts["img"] >= 1:
# continue
if counts["p"] and counts["img"] > counts["p"]:
reason = "too many images (%s)" % counts["img"]
to_remove = True
elif counts["li"] > counts["p"] and tag != "ul" and tag != "ol":
reason = "more <li>s than <p>s"
to_remove = True
elif counts["input"] > (counts["p"] / 3):
reason = "less than 3x <p>s than <input>s"
to_remove = True
elif content_length < (MIN_LEN) and (counts["img"] == 0 or counts["img"] > 2):
reason = "too short content length %s without a single image" % content_length
to_remove = True
elif weight < 25 and link_density > 0.2:
reason = "too many links %.3f for its weight %s" % (link_density, weight)
to_remove = True
elif weight >= 25 and link_density > 0.5:
reason = "too many links %.3f for its weight %s" % (link_density, weight)
to_remove = True
elif (counts["embed"] == 1 and content_length < 75) or counts["embed"] > 1:
reason = "<embed>s with too short content length, or too many <embed>s"
to_remove = True
# if el.tag == 'div' and counts['img'] >= 1 and to_remove:
# imgs = el.findall('.//img')
# valid_img = False
# self.debug(tounicode(el))
# for img in imgs:
#
# height = img.get('height')
# text_length = img.get('text_length')
# self.debug ("height %s text_length %s" %(repr(height), repr(text_length)))
# if to_int(height) >= 100 or to_int(text_length) >= 100:
# valid_img = True
# self.debug("valid image" + tounicode(img))
# break
# if valid_img:
# to_remove = False
# self.debug("Allowing %s" %el.text_content())
# for desnode in self.tags(el, "table", "ul", "div"):
# allowed[desnode] = True
#find x non empty preceding and succeeding siblings
i, j = 0, 0
x = 1
siblings = []
for sib in el.itersiblings():
#self.debug(sib.text_content())
sib_content_length = text_length(sib)
if sib_content_length:
i =+ 1
siblings.append(sib_content_length)
if i == x:
break
for sib in el.itersiblings(preceding=True):
#self.debug(sib.text_content())
sib_content_length = text_length(sib)
if sib_content_length:
j =+ 1
siblings.append(sib_content_length)
if j == x:
break
#self.debug(str(siblings))
if siblings and sum(siblings) > 1000 :
to_remove = False
self.debug("Allowing %s" % describe(el))
for desnode in self.tags(el, "table", "ul", "div"):
allowed[desnode] = True
if to_remove:
self.debug("Cleaned %6.3f %s with weight %s cause it has %s." %
(content_score, describe(el), weight, reason))
#print tounicode(el)
#self.debug("pname %s pweight %.3f" %(pname, pweight))
el.drop_tree()
for el in ([node] + [n for n in node.iter()]):
if not (self.options['attributes']):
#el.attrib = {} #FIXME:Checkout the effects of disabling this
pass
return clean_attributes(tounicode(node))
class HashableElement():
def __init__(self, node):
self.node = node
self._path = None
def _get_path(self):
if self._path is None:
reverse_path = []
node = self.node
while node is not None:
node_id = (node.tag, tuple(node.attrib.items()), node.text)
reverse_path.append(node_id)
node = node.getparent()
self._path = tuple(reverse_path)
return self._path
path = property(_get_path)
def __hash__(self):
return hash(self.path)
def __eq__(self, other):
return self.path == other.path
def __getattr__(self, tag):
return getattr(self.node, tag)
def main():
import logging
from optparse import OptionParser
parser = OptionParser(usage="%prog: [options] [file]")
parser.add_option('-v', '--verbose', action='store_true')
parser.add_option('-u', '--url', help="use URL instead of a local file")
(options, args) = parser.parse_args()
if not (len(args) == 1 or options.url):
parser.print_help()
sys.exit(1)
logging.basicConfig(level=logging.INFO)
file = None
if options.url:
import urllib
file = urllib.urlopen(options.url)
else:
file = open(args[0], 'rt')
enc = sys.__stdout__.encoding or 'utf-8'
try:
print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
finally:
file.close()
if __name__ == '__main__':
main()

View File

@ -335,6 +335,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
geom = gprefs.get('bulk_metadata_window_geometry', None) geom = gprefs.get('bulk_metadata_window_geometry', None)
if geom is not None: if geom is not None:
self.restoreGeometry(bytes(geom)) self.restoreGeometry(bytes(geom))
self.languages.init_langs(self.db)
self.languages.setEditText('') self.languages.setEditText('')
self.exec_() self.exec_()

Some files were not shown because too many files have changed in this diff Show More