This commit is contained in:
GRiker
2012-11-25 04:31:58 -07:00
367 changed files with 128113 additions and 85835 deletions
+42
View File
@@ -39,3 +39,45 @@ recipes/.git
recipes/.gitignore
recipes/README
recipes/katalog_egazeciarz.recipe
recipes/tv_axnscifi.recipe
recipes/tv_comedycentral.recipe
recipes/tv_discoveryscience.recipe
recipes/tv_foxlife.recipe
recipes/tv_fox.recipe
recipes/tv_hbo.recipe
recipes/tv_kinopolska.recipe
recipes/tv_nationalgeographic.recipe
recipes/tv_polsat2.recipe
recipes/tv_polsat.recipe
recipes/tv_tv4.recipe
recipes/tv_tvn7.recipe
recipes/tv_tvn.recipe
recipes/tv_tvp1.recipe
recipes/tv_tvp2.recipe
recipes/tv_tvphd.recipe
recipes/tv_tvphistoria.recipe
recipes/tv_tvpkultura.recipe
recipes/tv_tvppolonia.recipe
recipes/tv_tvpuls.recipe
recipes/tv_viasathistory.recipe
recipes/icons/tv_axnscifi.png
recipes/icons/tv_comedycentral.png
recipes/icons/tv_discoveryscience.png
recipes/icons/tv_foxlife.png
recipes/icons/tv_fox.png
recipes/icons/tv_hbo.png
recipes/icons/tv_kinopolska.png
recipes/icons/tv_nationalgeographic.png
recipes/icons/tv_polsat2.png
recipes/icons/tv_polsat.png
recipes/icons/tv_tv4.png
recipes/icons/tv_tvn7.png
recipes/icons/tv_tvn.png
recipes/icons/tv_tvp1.png
recipes/icons/tv_tvp2.png
recipes/icons/tv_tvphd.png
recipes/icons/tv_tvphistoria.png
recipes/icons/tv_tvpkultura.png
recipes/icons/tv_tvppolonia.png
recipes/icons/tv_tvpuls.png
recipes/icons/tv_viasathistory.png
+83
View File
@@ -19,6 +19,89 @@
# new recipes:
# - title:
- version: 0.9.7
date: 2012-11-23
new features:
- title: "Edit metadata dialog: Show the size of the current book cover in the edit metadata dialog."
tickets: [1079781]
- title: "Get Books: Allow easy searching by title and author in addition to any keyword, to prevent large numbers of spurious matches."
- title: "An option to automatically convert any added book to the current output format, found under Preferences->Adding books"
- title: "E-book viewer: Allow viewing tables in a separate popup window by right clicking on the table and selecting 'View table'. Useful for reference books that have lots of large tables."
tickets: [1080710]
- title: "Catalogs: Add the current library name as an available field when generating catalogs in csv/xml format."
tickets: [1078422]
- title: "Enable colored text in the output from the command line tools on windows"
- title: "E-book viewer: Add an option to hide the help message when entering full screen mode"
- title: "E-book viewer: Add an option to always start the viewer in full screen mode"
- title: "E-book viewer: Add many more controls to the context menu, particularly useful in full screen mode"
- title: "E-book viewer: Allow easy searching of the selected word or phrase in google via the context menu"
- title: "Add a new type of FileType plugin, postimport, that runs after a book has been added to the database."
- title: "Get Books: Remove Gandalf store, add Publio store. Update the Legimi store plugin for website changes"
bug fixes:
- title: "Conversion: Correctly handle values of left and right for the deprecated align attribute of images, mapping them to the CSS float property instead of to text-align."
tickets: [1081094]
- title: "MOBI Output: When generating joint MOBI6/KF8 files do not set incorrect display CSS values for tables in the KF8 part"
- title: "Connect to iTunes: Ignore AAC audio files."
tickets: [1081096]
- title: "E-book viewer: Fix restoring from fullscreen not respecting maximized window state"
- title: "Fix rows in the device books view sometimes being too high"
- title: "Catalogs: Fixed a problem occurring when merging comments with a custom field whose type is a list."
- title: "Linux binary: Use exec in the wrapper shell scripts that are used to set env vars and launch calibre utilities."
tickets: [1077884]
- title: "E-book viewer: Fix blank pages after every page when viewing some comic files in paged mode"
- title: "E-book viewer: When printing, respect the specified page range."
tickets: [1074220]
- title: "Font subsetting: Parse the GSUB table for glyph substitution rules and do not remove any glyphs that could act as substitutes. Keep zero length glyphs like the glyphs for non printable characters when subsetting TrueType outlines."
- title: "Smarten punctuation: Fix self closing script tags causing smarten punctuation to fail"
improved recipes:
- Arguments and facts
- Business Standard
- The New Yorker
new recipes:
- title: Various Czech and Hungarian news sources
author: bubak
- title: Various Polish recipes
author: Artur Stachecki
- title: Buchreport
author: a.peter
- title: Red Voltaire
author: atordo
- title: Autosport
author: Mr Stefan
- title: House News
author: Eddie Lau
- version: 0.9.6
date: 2012-11-10
+11 -7
View File
@@ -649,20 +649,24 @@ If it still wont launch, start a command prompt (press the windows key and R; th
Post any output you see in a help message on the `Forum <http://www.mobileread.com/forums/forumdisplay.php?f=166>`_.
|app| freezes when I click on anything?
|app| freezes/crashes occasionally?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are three possible things I know of, that can cause this:
* You recently connected an external monitor or TV to your computer. In this case, whenever |app| opens a new window like the edit metadata window or the conversion dialog, it appears on the second monitor where you dont notice it and so you think |app| has frozen. Disconnect your second monitor and restart calibre.
* You recently connected an external monitor or TV to your computer. In
this case, whenever |app| opens a new window like the edit metadata
window or the conversion dialog, it appears on the second monitor where
you dont notice it and so you think |app| has frozen. Disconnect your
second monitor and restart calibre.
* You are using a Wacom branded mouse. There is an incompatibility between Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom mouse.
* You are using a Wacom branded mouse. There is an incompatibility between
Wacom mice and the graphics toolkit |app| uses. Try using a non-Wacom
mouse.
* If you use RoboForm, it is known to cause |app| to crash. Add |app| to
the blacklist of programs inside RoboForm to fix this.
* Sometimes if some software has installed lots of new files in your fonts folder, |app| can crash until it finishes indexing them. Just start |app|, then leave it alone for about 20 minutes, without clicking on anything. After that you should be able to use |app| as normal.
the blacklist of programs inside RoboForm to fix this. Or uninstall
RoboForm.
|app| is not starting on OS X?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+13 -6
View File
@@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2010 - 2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.aif.ru
'''
@@ -19,12 +19,19 @@ class AIF_ru(BasicNewsRecipe):
encoding = 'cp1251'
language = 'ru'
publication_type = 'magazine'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} '
keep_only_tags = [dict(name='div',attrs={'id':'inner'})]
masthead_url = 'http://static3.aif.ru/glossy/index/i/logo.png'
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif}
img{display: block}
"""
keep_only_tags = [
dict(name='div',attrs={'class':['content-header', 'zoom']})
,dict(name='div',attrs={'id':'article-text'})
]
remove_tags = [
dict(name=['iframe','object','link','base','input','img'])
,dict(name='div',attrs={'class':'photo'})
,dict(name='p',attrs={'class':'resizefont'})
dict(name=['iframe','object','link','base','input','meta'])
,dict(name='div',attrs={'class':'in-topic'})
]
feeds = [(u'News', u'http://www.aif.ru/rss/all.php')]
+69
View File
@@ -0,0 +1,69 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class aktualneRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'aktualne.cz'
publisher = u'Centrum holdings'
description = 'aktuálně.cz'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://aktualne.centrum.cz/feeds/rss/domaci/?photo=0'),
(u'Zprávy', u'http://aktualne.centrum.cz/feeds/rss/zpravy/?photo=0'),
(u'Praha', u'http://aktualne.centrum.cz/feeds/rss/domaci/regiony/praha/?photo=0'),
(u'Ekonomika', u'http://aktualne.centrum.cz/feeds/rss/ekonomika/?photo=0'),
(u'Finance', u'http://aktualne.centrum.cz/feeds/rss/finance/?photo=0'),
(u'Blogy a názory', u'http://blog.aktualne.centrum.cz/export-all.php')
]
language = 'cs'
cover_url = 'http://img.aktualne.centrum.cz/design/akt4/o/l/logo-akt-ciste.png'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
remove_tags_before = dict(name='h1', attrs={'class':['titulek-clanku']})
filter_regexps = [r'img.aktualne.centrum.cz']
remove_tags = [dict(name='div', attrs={'id':['social-bookmark']}),
dict(name='div', attrs={'class':['box1', 'svazane-tagy']}),
dict(name='div', attrs={'class':'itemcomment id0'}),
dict(name='div', attrs={'class':'hlavicka'}),
dict(name='div', attrs={'class':'hlavni-menu'}),
dict(name='div', attrs={'class':'top-standard-brand-obal'}),
dict(name='div', attrs={'class':'breadcrumb'}),
dict(name='div', attrs={'id':'start-standard'}),
dict(name='div', attrs={'id':'forum'}),
dict(name='span', attrs={'class':'akce'}),
dict(name='span', attrs={'class':'odrazka vetsi'}),
dict(name='div', attrs={'class':'boxP'}),
dict(name='div', attrs={'class':'box2'})]
preprocess_regexps = [
(re.compile(r'<div class="(contenttitle"|socialni-site|wiki|facebook-promo|facebook-like-button"|meta-akce).*', re.DOTALL|re.IGNORECASE), lambda match: '</body>'),
(re.compile(r'<div class="[^"]*poutak-clanek-trojka".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = []
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url
def encoding(self, source):
if source.newurl.find('blog.aktualne') >= 0:
enc = 'utf-8'
else:
enc = 'iso-8859-2'
self.log.debug('Called encoding ' + enc + " " + str(source.newurl))
return source.decode(enc, 'replace')
+48
View File
@@ -0,0 +1,48 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AntywebRecipe(BasicNewsRecipe):
encoding = 'utf-8'
__license__ = 'GPL v3'
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'Antyweb'
category = u'News'
description = u'Blog o internecie i nowych technologiach'
cover_url=''
remove_empty_feeds= True
auto_cleanup = False
no_stylesheets=True
use_embedded_content = False
oldest_article = 1
max_articles_per_feed = 100
remove_javascript = True
simultaneous_downloads = 3
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
remove_tags =[]
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
'''
feeds = [
(u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
+27
View File
@@ -0,0 +1,27 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
appfunds.blogspot.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class app_funds(BasicNewsRecipe):
title = u'APP Funds'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Blog inwestora dla inwestorów i oszczędzających'
INDEX='http://appfunds.blogspot.com'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript=True
no_stylesheets=True
auto_cleanup = True
feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')]
+30
View File
@@ -0,0 +1,30 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
'''
www.autosport.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class autosport(BasicNewsRecipe):
title = u'Autosport'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'en_GB'
description =u'Daily Formula 1 and motorsport news from the leading weekly motor racing magazine. The authority on Formula 1, F1, MotoGP, GP2, Champ Car, Le Mans...'
masthead_url='http://cdn.images.autosport.com/asdotcom.gif'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'news_headline'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_author'}))
keep_only_tags.append(dict(name = 'td', attrs = {'class' : 'news_article_date'}))
keep_only_tags.append(dict(name = 'p'))
feeds = [(u'ALL NEWS', u'http://www.autosport.com/rss/allnews.xml')]
+50
View File
@@ -0,0 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
bankier.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class bankier(BasicNewsRecipe):
title = u'Bankier.pl'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
INDEX='http://bankier.pl/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
simultaneous_downloads = 5
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
remove_tags =[]
remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
#remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
#remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
feeds = [
(u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
(u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
(u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
(u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
(u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
(u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
]
def print_version(self, url):
segment = url.split('.')
urlPart = segment[2]
segments = urlPart.split('-')
urlPart2 = segments[-1]
return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
+55
View File
@@ -0,0 +1,55 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class bleskRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Blesk'
publisher = u''
description = 'blesk.cz'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
feeds = [
(u'Zprávy', u'http://www.blesk.cz/rss/7'),
(u'Blesk', u'http://www.blesk.cz/rss/1'),
(u'Sex a tabu', u'http://www.blesk.cz/rss/2'),
(u'Celebrity', u'http://www.blesk.cz/rss/5'),
(u'Cestování', u'http://www.blesk.cz/rss/12')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://img.blesk.cz/images/blesk/blesk-logo.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id':['boxContent']})
remove_tags_after = dict(name='div', attrs={'class':['artAuthors']})
remove_tags = [dict(name='div', attrs={'class':['link_clanek']}),
dict(name='div', attrs={'id':['partHeader']}),
dict(name='div', attrs={'id':['top_bottom_box', 'lista_top']})]
preprocess_regexps = [(re.compile(r'<div class="(textovytip|related)".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = [dict(name='div', attrs={'class':'articleContent'})]
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url
+28
View File
@@ -0,0 +1,28 @@
from calibre.web.feeds.news import BasicNewsRecipe
class blognexto(BasicNewsRecipe):
title = 'BLOG.NEXTO.pl'
__author__ = 'MrStefan <mrstefaan@gmail.com>'
language = 'pl'
description ='o e-publikacjach prawie wszystko'
masthead_url='http://blog.nexto.pl/wp-content/uploads/2012/04/logo-blog-nexto.pl_.jpg'
remove_empty_feeds= True
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'content'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'comment-cloud'}))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'post-date1'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fb-like'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'tags'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'postnavi'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'commments-box'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'respond'}))
feeds = [('Artykuly', 'http://feeds.feedburner.com/blognexto')]
+140
View File
@@ -0,0 +1,140 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
import datetime, re
class brewiarz(BasicNewsRecipe):
title = u'Brewiarz'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Serwis poświęcony Liturgii Godzin (brewiarzowi) - formie codziennej modlitwy Kościoła katolickiego.'
masthead_url = 'http://brewiarz.pl/images/logo2.gif'
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
publication_type = 'newspaper'
next_days = 1
def parse_index(self):
dec2rom_dict = {"01": "i", "02": "ii", "03": "iii", "04": "iv",
"05": "v", "06": "vi", "07": "vii", "08": "viii",
"09": "ix", "10": "x", "11": "xi", "12": "xii"}
weekday_dict = {"Sunday": "Niedziela", "Monday": "Poniedziałek", "Tuesday": "Wtorek",
"Wednesday": "Środa", "Thursday": "Czwartek", "Friday": "Piątek", "Saturday": "Sobota"}
now = datetime.datetime.now()
feeds = []
for i in range(0, self.next_days):
url_date = now + datetime.timedelta(days=i)
url_date_month = url_date.strftime("%m")
url_date_month_roman = dec2rom_dict[url_date_month]
url_date_day = url_date.strftime("%d")
url_date_year = url_date.strftime("%Y")[2:]
url_date_weekday = url_date.strftime("%A")
url_date_weekday_pl = weekday_dict[url_date_weekday]
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/index.php3"
articles = self.parse_pages(url)
if articles:
title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year
feeds.append((title, articles))
else:
sectors = self.get_sectors(url)
for subpage in sectors:
title = url_date_weekday_pl + " " + url_date_day + "." + url_date_month + "." + url_date_year + " - " + subpage.string
url = "http://brewiarz.pl/" + url_date_month_roman + "_" + url_date_year + "/" + url_date_day + url_date_month + "/" + subpage['href']
print(url)
articles = self.parse_pages(url)
if articles:
feeds.append((title, articles))
return feeds
def get_sectors(self, url):
sectors = []
soup = self.index_to_soup(url)
sectors_table = soup.find(name='table', attrs={'width': '490'})
sector_links = sectors_table.findAll(name='a')
for sector_links_modified in sector_links:
link_parent_text = sector_links_modified.findParent(name='div').text
if link_parent_text:
sector_links_modified.text = link_parent_text.text
sectors.append(sector_links_modified)
return sectors
def parse_pages(self, url):
current_articles = []
soup = self.index_to_soup(url)
www = soup.find(attrs={'class': 'www'})
if www:
box_title = www.find(text='Teksty LG')
article_box_parent = box_title.findParent('ul')
article_box_sibling = article_box_parent.findNextSibling('ul')
for li in article_box_sibling.findAll('li'):
link = li.find(name='a')
ol = link.findNextSibling(name='ol')
if ol:
sublinks = ol.findAll(name='a')
for sublink in sublinks:
link_title = self.tag_to_string(link) + " - " + self.tag_to_string(sublink)
link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', sublink['href'])
link_url = url[:-10] + link_url_print
current_articles.append({'title': link_title,
'url': link_url, 'description': '', 'date': ''})
else:
if link.findParent(name = 'ol'):
continue
else:
link_title = self.tag_to_string(link)
link_url_print = re.sub('php3', 'php3?kr=_druk&wr=lg&', link['href'])
link_url = url[:-10] + link_url_print
current_articles.append({'title': link_title,
'url': link_url, 'description': '', 'date': ''})
return current_articles
else:
return None
def preprocess_html(self, soup):
footer = soup.find(name='a', attrs={'href': 'http://brewiarz.pl'})
footer_parent = footer.findParent('div')
footer_parent.extract()
header = soup.find(text='http://brewiarz.pl')
header_parent = header.findParent('div')
header_parent.extract()
subheader = soup.find(text='Kolor szat:').findParent('div')
subheader.extract()
color = soup.find('b')
color.extract()
cleaned = self.strip_tags(soup)
div = cleaned.findAll(name='div')
div[1].extract()
div[2].extract()
div[3].extract()
return cleaned
def strip_tags(self, soup_dirty):
VALID_TAGS = ['p', 'div', 'br', 'b', 'a', 'title', 'head', 'html', 'body']
for tag in soup_dirty.findAll(True):
if tag.name not in VALID_TAGS:
for i, x in enumerate(tag.parent.contents):
if x == tag:
break
else:
print "Can't find", tag, "in", tag.parent
continue
for r in reversed(tag.contents):
tag.parent.insert(i, r)
tag.extract()
return soup_dirty
+45
View File
@@ -0,0 +1,45 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Buchreport to an ebook.'''
class Buchreport(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
description = 'Buchreport'
version = 4
title = u'Buchreport'
timefmt = ' [%d.%m.%Y]'
encoding = 'cp1252'
language = 'de'
extra_css = 'body { margin-left: 0.00em; margin-right: 0.00em; } \
article, articledate, articledescription { text-align: left; } \
h1 { text-align: left; font-size: 140%; font-weight: bold; } \
h2 { text-align: left; font-size: 100%; font-weight: bold; font-style: italic; } \
h3 { text-align: left; font-size: 100%; font-weight: regular; font-style: italic; } \
h4, h5, h6 { text-align: left; font-size: 100%; font-weight: bold; }'
oldest_article = 7.0
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
remove_tags_before = dict(name='h2')
remove_tags_after = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]})
]
remove_tags = [
dict(name='div', attrs={'style':["padding-top:10px;clear:both"]}),
dict(name='iframe'),
dict(name='img')
]
feeds = [
(u'Buchreport', u'http://www.buchreport.de/index.php?id=5&type=100')
]
def get_masthead_url(self):
return 'http://www.buchreport.de/fileadmin/template/img/buchreport_logo.jpg'
+9 -2
View File
@@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.business-standard.com
'''
@@ -14,10 +14,12 @@ class BusinessStandard(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
auto_cleanup = False
encoding = 'cp1252'
publisher = 'Business Standard Limited'
category = 'news, business, money, india, world'
language = 'en_IN'
masthead_url = 'http://feeds.business-standard.com/images/logo_08.jpg'
conversion_options = {
'comments' : description
@@ -26,7 +28,7 @@ class BusinessStandard(BasicNewsRecipe):
,'publisher' : publisher
,'linearize_tables': True
}
keep_only_tags=[dict(attrs={'class':'TableClas'})]
#keep_only_tags=[dict(name='td', attrs={'class':'TableClas'})]
remove_tags = [
dict(name=['object','link','script','iframe','base','meta'])
,dict(attrs={'class':'rightDiv2'})
@@ -45,3 +47,8 @@ class BusinessStandard(BasicNewsRecipe):
,(u'Management & Mktg' , u'http://feeds.business-standard.com/rss/7_0.xml' )
,(u'Opinion' , u'http://feeds.business-standard.com/rss/5_0.xml' )
]
def print_version(self, url):
l, s, tp = url.rpartition('/')
t, k, autono = l.rpartition('/')
return 'http://www.business-standard.com/india/printpage.php?autono=' + autono + '&tp=' + tp
+68
View File
@@ -0,0 +1,68 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class ceskaPoziceRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Česká pozice'
description = 'Česká pozice'
oldest_article = 2
max_articles_per_feed = 20
feeds = [
(u'Všechny články', u'http://www.ceskapozice.cz/rss.xml'),
(u'Domov', u'http://www.ceskapozice.cz/taxonomy/term/16/feed'),
(u'Chrono', u'http://www.ceskapozice.cz/chrono/feed'),
(u'Evropa', u'http://www.ceskapozice.cz/taxonomy/term/17/feed')
]
language = 'cs'
cover_url = 'http://www.ceskapozice.cz/sites/default/files/cpozice_logo.png'
remove_javascript = True
no_stylesheets = True
domain = u'http://www.ceskapozice.cz'
use_embedded_content = False
remove_tags = [dict(name='div', attrs={'class':['block-ad', 'region region-content-ad']}),
dict(name='ul', attrs={'class':'links'}),
dict(name='div', attrs={'id':['comments', 'back-to-top']}),
dict(name='div', attrs={'class':['next-page', 'region region-content-ad']}),
dict(name='cite')]
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url
def preprocess_html(self, soup):
self.append_page(soup, soup.body, 3)
return soup
def append_page(self, soup, appendtag, position):
pager = soup.find('div', attrs={'class':'paging-bottom'})
if pager:
nextbutton = pager.find('li', attrs={'class':'pager-next'})
if nextbutton:
nexturl = self.domain + nextbutton.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'main-body'})
for it in texttag.findAll('div', attrs={'class':'region region-content-ad'}):
it.extract()
for it in texttag.findAll('cite'):
it.extract()
newpos = len(texttag.contents)
self.append_page(soup2, texttag, newpos)
texttag.extract()
appendtag.insert(position, texttag)
pager.extract()
+30
View File
@@ -0,0 +1,30 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class ceskenovinyRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'České Noviny'
description = 'ceskenoviny.cz'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://www.ceskenoviny.cz/sluzby/rss/domov.php')
#,(u'Hlavní události', u'http://www.ceskenoviny.cz/sluzby/rss/index.php')
#,(u'Přehled zpráv', u'http://www.ceskenoviny.cz/sluzby/rss/zpravy.php')
#,(u'Ze světa', u'http://www.ceskenoviny.cz/sluzby/rss/svet.php')
#,(u'Kultura', u'http://www.ceskenoviny.cz/sluzby/rss/kultura.php')
#,(u'IT', u'http://www.ceskenoviny.cz/sluzby/rss/pocitace.php')
]
language = 'cs'
cover_url = 'http://i4.cn.cz/grafika/cn_logo-print.gif'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
filter_regexps = [r'img.aktualne.centrum.cz']
keep_only_tags = [dict(name='div', attrs={'id':'clnk'})]
+26
View File
@@ -0,0 +1,26 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class cro6Recipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Český rozhlas 6'
description = 'Český rozhlas 6'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Český rozhlas 6', u'http://www.rozhlas.cz/export/cro6/')
]
language = 'cs'
cover_url = 'http://www.rozhlas.cz/img/e5/logo/cro6.png'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
remove_tags = [dict(name='div', attrs={'class':['audio-play-all', 'poradHeaders', 'actions']}),
dict(name='p', attrs={'class':['para-last']})]
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+39
View File
@@ -0,0 +1,39 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class demagogRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Demagog.cz'
publisher = u''
description = 'demagog.cz'
oldest_article = 6
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Aktuality', u'http://demagog.cz/rss')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://demagog.cz/content/images/demagog.cz.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
.vyrok_suhrn{margin-top:50px; }
.vyrok{margin-bottom:30px; }
"""
remove_tags = [dict(name='a', attrs={'class':'vyrok_odovodnenie_tgl'}),
dict(name='img', attrs={'class':'vyrok_fotografia'})]
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class':'vyrok_text_after'})
preprocess_regexps = [(re.compile(r'(<div class="vyrok_suhrn">)', re.DOTALL|re.IGNORECASE), lambda match: '\1<hr>')]
+36
View File
@@ -0,0 +1,36 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class ceskyDenikRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'denik.cz'
publisher = u''
description = u'Český deník'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Z domova', u'http://www.denik.cz/rss/z_domova.html')
,(u'Pražský deník - Moje Praha', u'http://prazsky.denik.cz/rss/zpravy_region.html')
#,(u'Zahraničí', u'http://www.denik.cz/rss/ze_sveta.html')
#,(u'Kultura', u'http://www.denik.cz/rss/kultura.html')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://g.denik.cz/images/loga/denik.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_tags = []
keep_only_tags = [dict(name='div', attrs={'class':'content'})]
#remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='p', attrs={'class':'clanek-autor'})
+28
View File
@@ -0,0 +1,28 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class denikReferendumRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Den\u00edk Referendum'
publisher = u''
description = ''
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Deník Referendum', u'http://feeds.feedburner.com/DenikReferendum')
]
#encoding = 'iso-8859-2'
language = 'cs'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_attributes = []
remove_tags_after = dict(name='div', attrs={'class':['text']})
remove_tags = [dict(name='div', attrs={'class':['box boxLine', 'box noprint', 'box']}),
dict(name='h3', attrs={'class':'head alt'})]
keep_only_tags = [dict(name='div', attrs={'id':['content']})]
+1 -2
View File
@@ -6,7 +6,6 @@ class Dobreprogramy_pl(BasicNewsRecipe):
__author__ = 'fenuks'
__licence__ ='GPL v3'
category = 'IT'
language = 'pl'
masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png'
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
description = u'Aktualności i blogi z dobreprogramy.pl'
@@ -29,4 +28,4 @@ class Dobreprogramy_pl(BasicNewsRecipe):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup
return soup
+1
View File
@@ -7,6 +7,7 @@ class AdvancedUserRecipe1332847053(BasicNewsRecipe):
title = u'Editoriali'
__author__ = 'faber1971'
description = 'Leading articles on Italy by the best Italian editorials'
language = 'it'
oldest_article = 1
max_articles_per_feed = 100
+35
View File
@@ -0,0 +1,35 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class f1ultra(BasicNewsRecipe):
title = u'Formuła 1 - F1 ultra'
__license__ = 'GPL v3'
__author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
(re.compile(r'align="right"'), lambda match: ''),
(re.compile(r'width=\"*\"'), lambda match: ''),
(re.compile(r'\<table .*?\>'), lambda match: '')]
extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
img { display: block; clear: both;}
'''
remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
+1
View File
@@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1349086293(BasicNewsRecipe):
title = u'Foreign Policy'
language = 'en'
__author__ = 'Darko Miletic'
description = 'International News'
publisher = 'Washingtonpost.Newsweek Interactive, LLC'
+67 -18
View File
@@ -1,39 +1,88 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
__copyright__ = u'2010-2012, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
fronda.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
from datetime import timedelta, date
class Fronda(BasicNewsRecipe):
title = u'Fronda.pl'
publisher = u'Fronda.pl'
description = u'Portal po\u015bwi\u0119cony - Infformacje'
description = u'Portal po\u015bwi\u0119cony - Informacje'
language = 'pl'
__author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
feeds = [(u'Infformacje', u'http://fronda.pl/news/feed')]
extra_css = '''
h1 {font-size:150%}
.body {text-align:left;}
div.headline {font-weight:bold}
'''
keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}),
dict(name='div', attrs={'class':'naglowek_tresc'}),
dict(name='div', attrs={'id':'czytaj'}) ]
earliest_date = date.today() - timedelta(days=oldest_article)
remove_tags = [dict(name='a', attrs={'class':'print'})]
def date_cut(self,datestr):
# eg. 5.11.2012, 12:07
timestamp = datestr.split(',')[0]
parts = timestamp.split('.')
art_date = date(int(parts[2]),int(parts[1]),int(parts[0]))
return True if art_date < self.earliest_date else False
preprocess_regexps = [
(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
(r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
(r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
(r'<h3>Zobacz t.*?</div>', lambda match: '</div>'),
(r'<p[^>]*>&nbsp;</p>', lambda match: ''),
(r'<p><span style=".*?"><br /></span></p> ', lambda match: ''),
(r'<a style=\'float:right;margin-top:3px;\' href="http://www.facebook.com/share.php?.*?</a>', lambda match: '')]
]
def parse_index(self):
genres = [
('ekonomia,4.html', 'Ekonomia'),
('filozofia,15.html', 'Filozofia'),
('historia,6.html', 'Historia'),
('kosciol,8.html', 'Kościół'),
('kultura,5.html', 'Kultura'),
('media,10.html', 'Media'),
('nauka,9.html', 'Nauka'),
('polityka,11.html', 'Polityka'),
('polska,12.html', 'Polska'),
('prolife,3.html', 'Prolife'),
('religia,7.html', 'Religia'),
('rodzina,13.html', 'Rodzina'),
('swiat,14.html', 'Świat'),
('wydarzenie,16.html', 'Wydarzenie')
]
feeds = []
articles = {}
for url, genName in genres:
soup = self.index_to_soup('http://www.fronda.pl/c/'+ url)
articles[genName] = []
for item in soup.findAll('li'):
article_h = item.find('h2')
if not article_h:
continue
article_date = self.tag_to_string(item.find('b'))
if self.date_cut(article_date):
continue
article_a = article_h.find('a')
article_url = 'http://www.fronda.pl' + article_a['href']
article_title = self.tag_to_string(article_a)
articles[genName].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date })
feeds.append((genName, articles[genName]))
return feeds
keep_only_tags = [
dict(name='div', attrs={'class':'yui-g'})
]
remove_tags = [
dict(name='div', attrs={'class':['related-articles',
'button right',
'pagination']}),
dict(name='h3', attrs={'class':'block-header article comments'}),
dict(name='ul', attrs={'class':'comment-list'}),
dict(name='ul', attrs={'class':'category'}),
dict(name='p', attrs={'id':'comments-disclaimer'}),
dict(name='div', attrs={'id':'comment-form'})
]
+102
View File
@@ -0,0 +1,102 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
'''
krakow.gazeta.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class gw_krakow(BasicNewsRecipe):
title = u'Gazeta.pl Kraków'
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
language = 'pl'
description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'
category='newspaper'
publication_type = 'newspaper'
masthead_url='http://bi.gazeta.pl/im/5/8528/m8528105.gif'
INDEX='http://krakow.gazeta.pl/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'}))
remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'}))
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'}))
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'}))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'}))
remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})]
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')]
def skip_ad_pages(self, soup):
tag=soup.find(name='a', attrs={'class':'btn'})
if tag:
new_soup=self.index_to_soup(tag['href'], raw=True)
return new_soup
def append_page(self, soup, appendtag):
loop=False
tag = soup.find('div', attrs={'id':'Str'})
if appendtag.find('div', attrs={'id':'Str'}):
nexturl=tag.findAll('a')
appendtag.find('div', attrs={'id':'Str'}).extract()
loop=True
if appendtag.find(id='source'):
appendtag.find(id='source').extract()
while loop:
loop=False
for link in nexturl:
if u'następne' in link.string:
url= self.INDEX + link['href']
soup2 = self.index_to_soup(url)
pagetext = soup2.find(id='artykul')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag = soup2.find('div', attrs={'id':'Str'})
nexturl=tag.findAll('a')
loop=True
def gallery_article(self, appendtag):
tag=appendtag.find(id='container_gal')
if tag:
nexturl=appendtag.find(id='gal_btn_next').a['href']
appendtag.find(id='gal_navi').extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
pagetext=soup2.find(id='container_gal')
nexturl=pagetext.find(id='gal_btn_next')
if nexturl:
nexturl=nexturl.a['href']
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
rem=appendtag.find(id='gal_navi')
if rem:
rem.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
if soup.find(id='container_gal'):
self.gallery_article(soup.body)
return soup
+99
View File
@@ -0,0 +1,99 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
'''
warszawa.gazeta.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class gw_wawa(BasicNewsRecipe):
title = u'Gazeta.pl Warszawa'
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
language = 'pl'
description ='Wiadomości z Warszawy na portalu Gazeta.pl.'
category='newspaper'
publication_type = 'newspaper'
masthead_url='http://bi.gazeta.pl/im/3/4089/m4089863.gif'
INDEX='http://warszawa.gazeta.pl/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'}))
remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'}))
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'}))
remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'}))
remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'}))
feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')]
def skip_ad_pages(self, soup):
tag=soup.find(name='a', attrs={'class':'btn'})
if tag:
new_soup=self.index_to_soup(tag['href'], raw=True)
return new_soup
def append_page(self, soup, appendtag):
loop=False
tag = soup.find('div', attrs={'id':'Str'})
if appendtag.find('div', attrs={'id':'Str'}):
nexturl=tag.findAll('a')
appendtag.find('div', attrs={'id':'Str'}).extract()
loop=True
if appendtag.find(id='source'):
appendtag.find(id='source').extract()
while loop:
loop=False
for link in nexturl:
if u'następne' in link.string:
url= self.INDEX + link['href']
soup2 = self.index_to_soup(url)
pagetext = soup2.find(id='artykul')
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
tag = soup2.find('div', attrs={'id':'Str'})
nexturl=tag.findAll('a')
loop=True
def gallery_article(self, appendtag):
tag=appendtag.find(id='container_gal')
if tag:
nexturl=appendtag.find(id='gal_btn_next').a['href']
appendtag.find(id='gal_navi').extract()
while nexturl:
soup2=self.index_to_soup(nexturl)
pagetext=soup2.find(id='container_gal')
nexturl=pagetext.find(id='gal_btn_next')
if nexturl:
nexturl=nexturl.a['href']
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
rem=appendtag.find(id='gal_navi')
if rem:
rem.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
if soup.find(id='container_gal'):
self.gallery_article(soup.body)
return soup
+1 -1
View File
@@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Gazeta_Wyborcza(BasicNewsRecipe):
title = u'Gazeta Wyborcza'
title = u'Gazeta.pl'
__author__ = 'fenuks, Artur Stachecki'
language = 'pl'
description = 'news from gazeta.pl'
+30
View File
@@ -0,0 +1,30 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Eddie Lau'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipeHouseNews(BasicNewsRecipe):
title = u'House News \u4e3b\u5834\u65b0\u805e'
__author__ = 'Eddie Lau'
publisher = 'House News'
oldest_article = 1
max_articles_per_feed = 100
auto_cleanup = False
language = 'zh'
encoding = 'utf-8'
description = 'http://thehousenews.com'
category = 'Chinese, Blogs, Opinion, News, Hong Kong'
masthead_url = 'http://thehousenews.com/static/images/housebeta.jpg'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}'
feeds = [(u'Latest', u'http://thehousenews.com/rss/')]
keep_only_tags = [dict(name='h1'),
dict(name='div', attrs={'class':['photo']}),
dict(name='p', attrs={'class':'caption'}),
dict(name='div', attrs={'class':'articleTextWrap'}),
dict(name='div', attrs={'class':['author']}),
dict(name='p', attrs={'class':'date'})]
def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])
Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 878 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 668 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 471 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 415 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 699 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 982 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 290 B

After

Width:  |  Height:  |  Size: 786 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 536 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 490 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 802 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 802 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 802 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 221 B

After

Width:  |  Height:  |  Size: 802 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 588 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 485 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 698 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 609 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

+36
View File
@@ -0,0 +1,36 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class ihnedRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'iHNed.cz'
publisher = u''
description = 'ihned.cz'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
feeds = [
(u'Zprávy', u'http://zpravy.ihned.cz/?m=rss'),
(u'Hospodářské noviny', u'http://hn.ihned.cz/?p=500000_rss'),
(u'Byznys', u'http://byznys.ihned.cz/?m=rss'),
(u'Life', u'http://life.ihned.cz/?m=rss'),
(u'Dialog', u'http://dialog.ihned.cz/?m=rss')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://rss.ihned.cz/img/0/0_hp09/ihned.cz.gif'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id':['heading']})
remove_tags_after = dict(name='div', attrs={'id':['next-authors']})
remove_tags = [dict(name='ul', attrs={'id':['comm']}),
dict(name='div', attrs={'id':['r-big']}),
dict(name='div', attrs={'class':['tools tools-top']})]
+59
View File
@@ -0,0 +1,59 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
import re
from calibre.web.feeds.news import BasicNewsRecipe
class insider(BasicNewsRecipe):
__author__ = 'bubak'
title = 'Insider'
language = 'cs'
remove_tags = [dict(name='div', attrs={'class':'article-related-content'})
,dict(name='div', attrs={'class':'calendar'})
,dict(name='span', attrs={'id':'labelHolder'})
]
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'class':['doubleBlock textContentFormat']})]
preprocess_regexps = [(re.compile(r'T.mata:.*', re.DOTALL|re.IGNORECASE), lambda m: '</body>')]
needs_subscription = True
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www.denikinsider.cz/')
br.select_form(nr=0)
br['login-name'] = self.username
br['login-password'] = self.password
res = br.submit()
raw = res.read()
if u'Odhlásit se' not in raw:
raise ValueError('Failed to login to insider.cz'
'Check your username and password.')
return br
def parse_index(self):
articles = []
soup = self.index_to_soup('http://www.denikinsider.cz')
titles = soup.findAll('span', attrs={'class':'homepageArticleTitle'})
if titles is None:
raise ValueError('Could not find category content')
articles = []
seen_titles = set([])
for title in titles:
if title.string in seen_titles:
continue
article = title.parent
seen_titles.add(title.string)
url = article['href']
if url.startswith('/'):
url = 'http://www.denikinsider.cz/'+url
self.log('\tFound article:', title, 'at', url)
articles.append({'title':title.string, 'url':url, 'description':'',
'date':''})
return [(self.title, articles)]
+52
View File
@@ -0,0 +1,52 @@
from calibre.web.feeds.news import BasicNewsRecipe
class KrytykaPolitycznaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'intromatyk <intromatyk@gmail.com>'
language = 'pl'
version = 1
title = u'Krytyka Polityczna'
category = u'News'
description = u' Lewicowe pismo zaangażowane w bieg spraw publicznych w Polsce.'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 3
keep_only_tags =[]
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-content'}))
remove_tags =[]
remove_tags.append(dict(attrs = {'class' : ['field field-type-text field-field-story-switch', 'field field-type-filefield field-field-story-temp' , 'field field-type-text field-field-story-author', 'field field-type-text field-field-story-lead-switch']}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
td.contentheading{font-size: large; font-weight: bold;}
'''
feeds = [
('Wszystkie', 'http://www.krytykapolityczna.pl/rss.xml')
]
def print_version(self, url):
soup = self.index_to_soup(url)
print_ico = soup.find(attrs = {'class' : 'print-page'})
print_uri = print_ico['href']
self.log('PRINT', print_uri)
return 'http://www.krytykapolityczna.pl/' + print_uri
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
+32
View File
@@ -0,0 +1,32 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class kudyznudyRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Kudy z nudy'
publisher = u''
description = 'kudyznudy.cz'
oldest_article = 3
max_articles_per_feed = 20
use_embedded_content = False
feeds = [
(u'Praha nejnovější', u'http://www.kudyznudy.cz/RSS/Charts.aspx?Type=Newest&Lang=cs-CZ&RegionId=1')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.kudyznudy.cz/App_Themes/KzN/Images/Containers/Header/HeaderLogoKZN.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'class':['C_WholeContentPadding']})
remove_tags_after = dict(name='div', attrs={'class':['SurroundingsContainer']})
remove_tags = [dict(name='div', attrs={'class':['Details', 'buttons', 'SurroundingsContainer', 'breadcrumb']})]
keep_only_tags = []
+40
View File
@@ -0,0 +1,40 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class lnRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'lidovky'
publisher = u''
description = 'lidovky.cz'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Události', u'http://www.lidovky.cz/export/rss.asp?r=ln_domov'),
(u'Svět', u'http://www.lidovky.cz/export/rss.asp?r=ln_zahranici'),
(u'Byznys', u'http://www.lidovky.cz/export/rss.asp?c=ln_byznys'),
(u'Věda', u'http://www.lidovky.cz/export/rss.asp?r=ln_veda'),
(u'Názory', u'http://www.lidovky.cz/export/rss.asp?r=ln_nazory'),
(u'Relax', u'http://www.lidovky.cz/export/rss.asp?c=ln_relax')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://g.lidovky.cz/o/lidovky_ln3b/lidovky-logo.png'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'id':['content']})
remove_tags_after = dict(name='div', attrs={'class':['authors']})
preprocess_regexps = [(re.compile(r'<div id="(fb-root)".*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = []
+29
View File
@@ -0,0 +1,29 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class metropolRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Metropol TV'
publisher = u''
description = 'metropol.cz'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
feeds = [
(u'Metropolcv.cz', u'http://www.metropol.cz/rss/')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.metropol.cz/public/css/../images/logo/metropoltv.png'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
keep_only_tags = [dict(name='div', attrs={'id':['art-full']})]
+49
View File
@@ -0,0 +1,49 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MyAppleRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
version = 1
title = u'MyApple.pl'
category = u'News'
description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.'
cover_url=''
remove_empty_feeds= True
no_stylesheets=True
oldest_article = 7
max_articles_per_feed = 100000
recursions = 0
no_stylesheets = True
remove_javascript = True
simultaneous_downloads = 3
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
td.contentheading{font-size: large; font-weight: bold;}
'''
feeds = [
('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent&sectionid=1&days=120&count=10'),
]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
+30
View File
@@ -0,0 +1,30 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class nfpkRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Nadační fond proti korupci'
publisher = u''
description = 'nfpk.cz'
oldest_article = 7
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Aktuality', u'http://feeds.feedburner.com/nfpk')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.nfpk.cz/_templates/nfpk/_images/logo.gif'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
+56
View File
@@ -0,0 +1,56 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
'''
Fetch Népszabadság
'''
from calibre.web.feeds.news import BasicNewsRecipe
class nepszabadsag(BasicNewsRecipe):
title = u'N\u00e9pszabads\u00e1g'
description = ''
__author__ = 'bubak'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
oldest_article = 2
max_articles_per_feed = 20
no_stylesheets = True
language = 'hu'
#delay = 1
#timeout = 10
simultaneous_downloads = 5
#encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://nol.hu/_design/image/logo_nol_live.jpg'
feeds = [
(u'Belföld', u'http://nol.hu/feed/belfold.rss')
#,(u'Külföld', u'http://nol.hu/feed/kulfold.rss')
#,(u'Gazdaság', u'http://nol.hu/feed/gazdasag.rss')
#,(u'Kultúra', u'http://nol.hu/feed/kult.rss')
]
extra_css = '''
'''
remove_attributes = []
remove_tags_before = dict(name='div', attrs={'class':['d-source']})
remove_tags_after = dict(name='div', attrs={'class':['tags']})
remove_tags = [dict(name='div', attrs={'class':['h']}),
dict(name='tfoot')]
keep_only_tags = [dict(name='table', attrs={'class':'article-box'})]
# NS sends an ad page sometimes but not frequently enough, TBD
def AAskip_ad_pages(self, soup):
if ('advertisement' in soup.find('title').string.lower()):
href = soup.find('a').get('href')
self.log.debug('Skipping to: ' + href)
new = self.browser.open(href).read().decode('utf-8', 'ignore')
#ipython(locals())
self.log.debug('Finished: ' + href)
return new
else:
return None
+32
View File
@@ -0,0 +1,32 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class pesRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Neviditelný pes'
publisher = u''
description = u'Neviditelný pes'
oldest_article = 1
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Neviditelný pes', u'http://neviditelnypes.lidovky.cz/export/rss.asp?c=pes_neviditelny')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://g.zpravy.cz/o/pes/logo_pes.jpg'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_tags = []
remove_tags_before = dict(name='div', attrs={'id':'art-full'})
remove_tags_after = dict(name='div', attrs={'id':'authors'})
+6 -6
View File
@@ -22,9 +22,9 @@ class NewYorker(BasicNewsRecipe):
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
extra_css = """
body {font-family: "Times New Roman",Times,serif}
.articleauthor{color: #9F9F9F;
.articleauthor{color: #9F9F9F;
font-family: Arial, sans-serif;
font-size: small;
font-size: small;
text-transform: uppercase}
.rubric,.dd,h6#credit{color: #CD0021;
font-family: Arial, sans-serif;
@@ -63,11 +63,11 @@ class NewYorker(BasicNewsRecipe):
return url.strip()
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup('http://www.newyorker.com/magazine/toc/')
cover_item = soup.find('img',attrs={'id':'inThisIssuePhoto'})
cover_url = "http://www.newyorker.com/images/covers/1925/1925_02_21_p233.jpg"
soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine')
cover_item = soup.find('div',attrs={'id':'media-count-1'})
if cover_item:
cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
cover_url = 'http://www.newyorker.com' + cover_item.div.img['src'].strip()
return cover_url
def preprocess_html(self, soup):
+1 -1
View File
@@ -13,7 +13,7 @@ import datetime
class Newsweek(BasicNewsRecipe):
# how many issues to go back, 0 means get the most current one
BACK_ISSUES = 1
BACK_ISSUES = 2
EDITION = '0'
DATE = None
+50
View File
@@ -0,0 +1,50 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class novinkyRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'novinky.cz'
publisher = u'seznam.cz'
description = 'novinky.cz'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://www.novinky.cz/rss2/domaci/'),
(u'Praha', u'http://www.novinky.cz/rss2/vase-zpravy/praha/'),
(u'Ekonomika', u'http://www.novinky.cz/rss2/ekonomika/'),
(u'Finance', u'http://www.novinky.cz/rss2/finance/'),
]
#encoding = 'utf-8'
language = 'cs'
cover_url = 'http://www.novinky.cz/static/images/logo.gif'
remove_javascript = True
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'id':['pictureInnerBox']}),
dict(name='div', attrs={'id':['discussionEntry']}),
dict(name='span', attrs={'id':['mynews-hits', 'mynews-author']}),
dict(name='div', attrs={'class':['related']}),
dict(name='div', attrs={'id':['multimediaInfo']})]
remove_tags_before = dict(name='div',attrs={'class':['articleHeader']})
remove_tags_after = dict(name='div',attrs={'class':'related'})
keep_only_tags = []
# This source has identical articles under different links
# which are redirected to the common url. I've found
# just this API method that has the real URL
visited_urls = {}
def encoding(self, source):
url = source.newurl
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return source.decode('utf-8', 'replace')
+38
View File
@@ -0,0 +1,38 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class plRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Parlamentn\u00ed Listy'
publisher = u''
description = ''
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Parlamentní listy.cz', u'http://www.parlamentnilisty.cz/export/rss.aspx')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.parlamentnilisty.cz/design/listy-logo2.png'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_attributes = []
remove_tags = [dict(name='div', attrs={'class':['articledetailboxin','crumbs', 'relatedarticles articledetailbox']}),
dict(name='div', attrs={'class':['socialshare-1 noprint', 'socialshare-2 noprint']}),
dict(name='div', attrs={'id':'widget'}),
dict(name='div', attrs={'class':'article-discussion-box noprint'})]
preprocess_regexps = [(re.compile(r'<(span|strong)[^>]*>\s*Ptejte se politik.*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = [dict(name='div', attrs={'class':['article-detail']})]
+40
View File
@@ -0,0 +1,40 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class cpsRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Piratská strana'
publisher = u''
description = ''
oldest_article = 3
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Články', u'http://www.pirati.cz/rss.xml')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.pirati.cz/sites/all/themes/addari-cps/images/headbg.jpg'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
keep_only_tags = [dict(name='div', attrs={'id':'postarea'})]
remove_tags = [dict(name='div', attrs={'class':['breadcrumb', 'submitted', 'links-readmore']}),
dict(name='div', attrs={'id':['comments']})]
remove_tags_before = dict(name='font', attrs={'size':'+3'})
remove_tags_after = [dict(name='iframe')]
conversion_options = {'linearize_tables' : True}
+34
View File
@@ -0,0 +1,34 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class nfpkRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Piratské noviny'
publisher = u''
description = 'nfpk.cz'
oldest_article = 2
max_articles_per_feed = 20
use_embedded_content = False
remove_empty_feeds = True
feeds = [
(u'Aktuality', u'http://www.piratskenoviny.cz/run/rss.php')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.piratskenoviny.cz/imgs/piratske-noviny.gif'
remove_javascript = True
no_stylesheets = True
extra_css = """
"""
remove_attributes = []
remove_tags_before = dict(name='font', attrs={'size':'+3'})
remove_tags_after = [dict(name='iframe')]
conversion_options = {'linearize_tables' : True}
+1 -1
View File
@@ -4,7 +4,7 @@ class AdvancedUserRecipe1348063712(BasicNewsRecipe):
title = u'Portfolio.hu - English Edition'
__author__ = 'laca'
oldest_article = 7
language = 'en_HUN'
language = 'en_HU'
masthead_url = 'http://www.portfolio.hu/img/sit/angolfejlec2010.jpg'
use_embedded_content = False
auto_cleanup = True
+64
View File
@@ -0,0 +1,64 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class pravo(BasicNewsRecipe):
__author__ = 'bubak'
title = 'Právo'
language = 'cs'
remove_tags_before = dict(name='div', attrs={'class':'rubrika-ostat'})
remove_tags_after = dict(name='td', attrs={'class':'rubrika'})
remove_tags = [dict(name='td', attrs={'width':'273'})
,dict(name='td', attrs={'class':'rubrika'})
,dict(name='div', attrs={'class':'rubrika-ostat'})
]
extra_css = '.nadpis {font-weight: bold; font-size: 130%;} .medium {text-align: justify;}'
cover_url = 'http://pravo.novinky.cz/images/horni_6_logo.gif'
cover_margins = (0, 100, '#ffffff')
conversion_options = {'linearize_tables' : True}
no_stylesheets = True
# our variables
seen_titles = set([])
# only yesterday's articles are online
parent_url = 'http://pravo.novinky.cz/minule/'
feeds = [
('Hlavní stránka', 'http://pravo.novinky.cz/minule/index.php'),
('Zpravodajství', 'http://pravo.novinky.cz/minule/zpravodajstvi.php'),
('Komentáře', 'http://pravo.novinky.cz/minule/komentare.php'),
('Praha a střední Čechy', 'http://pravo.novinky.cz/minule/praha_stredni_cechy.php')
]
def parse_index(self):
articles = []
for feed in self.feeds:
articles.append(self.parse_page(feed))
return articles
def parse_page(self, (feed_title, url)):
articles = []
soup = self.index_to_soup(url)
titles = soup.findAll('a', attrs={'class':'nadpis'})
if titles is None:
raise ValueError('Could not find any articles on page ' + url)
articles = []
for article in titles:
title = article.string
if title in self.seen_titles:
continue
self.seen_titles.add(title)
url = article['href']
if not url.startswith('http'):
url = self.parent_url + url
self.log('\tFound article:', title, 'at', url)
articles.append({'title':title.string, 'url':url, 'description':'',
'date':''})
return (feed_title, articles)
+40
View File
@@ -0,0 +1,40 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
http://prawica.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class prawica_recipe(BasicNewsRecipe):
title = u'prawica.net'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description ='Wiadomości ze strony prawica.net'
INDEX='http://prawica.net/'
remove_empty_feeds= True
oldest_article = 1
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
feeds = [(u'all', u'http://prawica.net/all/feed')]
keep_only_tags =[]
#this line should show title of the article, but it doesnt work
keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'field field-type-viewfield field-field-autor2'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'field field-type-viewfield field-field-publikacje-autora'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'rate-widget-2 rate-widget clear-block rate-average rate-widget-fivestar rate-daa7512627f21dcf15e0af47e5279f0e rate-processed'}))
remove_tags_after =[(dict(name = 'div', attrs = {'class' : 'field-label-inline-first'}))]
def print_version(self, url):
return url.replace('http://prawica.net/', 'http://prawica.net/print/')
+32
View File
@@ -0,0 +1,32 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class RedVoltaireRecipe(BasicNewsRecipe):
title = u'Red Voltaire'
__author__ = 'atordo'
description = u'Red de prensa no alineada, especializada en el an\u00e1lisis de las relaciones internacionales'
oldest_article = 7
max_articles_per_feed = 30
auto_cleanup = False
no_stylesheets = True
language = 'es'
use_embedded_content = False
remove_javascript = True
cover_url = u'http://www.voltairenet.org/squelettes/elements/images/logo-voltairenet-org.png'
masthead_url = u'http://www.voltairenet.org/squelettes/elements/images/logo-voltairenet-org.png'
preprocess_regexps = [
(re.compile(r'<title>(?P<titulo>.+)</title>.+<span class="updated" title=".+"><time', re.IGNORECASE|re.DOTALL)
,lambda match:'</title></head><body><h1>'+match.group('titulo')+'</h1><time')
,(re.compile(r'<time datetime=.+pubdate>. (?P<fecha>.+)</time>.+<!------------------- COLONNE TEXTE ------------------->', re.IGNORECASE|re.DOTALL)
,lambda match:'<small>'+match.group('fecha')+'</small>')
,(re.compile(r'<aside>.+', re.IGNORECASE|re.DOTALL)
,lambda match:'</body></html>')
]
extra_css = '''
img{margin-bottom:0.4em; display:block; margin-left:auto; margin-right:auto}
'''
feeds = [u'http://www.voltairenet.org/spip.php?page=backend&id_secteur=1110&lang=es']
+37
View File
@@ -0,0 +1,37 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
import re
class respektRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Respekt'
publisher = u'Respekt'
description = 'Respekt'
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Všechny články', u'http://respekt.ihned.cz/index.php?p=R00000_rss')
,(u'Blogy', u'http://blog.respekt.ihned.cz/?p=Rb00VR_rss')
#,(u'Respekt DJ', u'http://respekt.ihned.cz/index.php?p=R00RDJ_rss')
]
encoding = 'cp1250'
language = 'cs'
cover_url = 'http://respekt.ihned.cz/img/R/respekt_logo.png'
remove_javascript = True
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':['d-tools', 'actions']})]
remove_tags_before = dict(name='div',attrs={'id':['detail']})
remove_tags_after = dict(name='div',attrs={'class':'d-tools'})
preprocess_regexps = [(re.compile(r'<div class="paid-zone".*', re.DOTALL|re.IGNORECASE), lambda match: 'Za zbytek článku je nutno platit. </body>'),
(re.compile(r'.*<div class="mm-ow">', re.DOTALL|re.IGNORECASE), lambda match: '<body>'),
(re.compile(r'<div class="col3">.*', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
keep_only_tags = []
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2012, Tomasz Dlugosz <tomek3d@gmail.com>'
'''
rybinski.eu
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Rybinski(BasicNewsRecipe):
title = u'Rybinski.eu - economy of the XXI century'
description = u'Blog ekonomiczny dra hab. Krzysztofa Rybi\u0144skiego'
language = 'pl'
__author__ = u'Tomasz D\u0142ugosz'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
feeds = [(u'wpisy', u'http://www.rybinski.eu/?feed=rss2&lang=pl')]
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags = [
dict(name = 'div', attrs = {'class' : 'post-meta-1'}),
dict(name = 'div', attrs = {'class' : 'post-meta-2'}),
dict(name = 'div', attrs = {'class' : 'post-comments'})
]
+26
View File
@@ -0,0 +1,26 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
samcik.blox.pl
'''
from calibre.web.feeds.news import BasicNewsRecipe
class samcik(BasicNewsRecipe):
title = u'Maciej Samcik Blog'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description =u'Blog Macieja Samcika, długoletniego dziennikarza ekonomicznego Gazety Wyborczej . O finansach małych i dużych. Mnóstwo ciekawostek na temat pieniędzy.'
oldest_article = 7
max_articles_per_feed = 100
remove_javascript=True
no_stylesheets=True
simultaneous_downloads = 3
remove_tags =[]
remove_tags.append(dict(name = 'table', attrs = {'border' : '0'}))
feeds = [(u'Wpisy', u'http://samcik.blox.pl/rss2')]
+13 -12
View File
@@ -17,6 +17,7 @@ class Sciencenews(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
auto_cleanup = True
timefmt = ' [%A, %d %B, %Y]'
extra_css = '''
@@ -31,14 +32,14 @@ class Sciencenews(BasicNewsRecipe):
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
'''
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
remove_tags = [
dict(name='ul', attrs={'id':'content_functions_bottom'})
,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
,dict(name='img', attrs={'class':'icon'})
,dict(name='div', attrs={'class': 'embiggen'})
]
#keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
#remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
#remove_tags = [
#dict(name='ul', attrs={'id':'content_functions_bottom'})
#,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
#,dict(name='img', attrs={'class':'icon'})
#,dict(name='div', attrs={'class': 'embiggen'})
#]
feeds = [(u"Science News / News Items", u'http://sciencenews.org/index.php/feed/type/news/name/news.rss/view/feed/name/all.rss')]
@@ -53,9 +54,9 @@ class Sciencenews(BasicNewsRecipe):
return cover_url
def preprocess_html(self, soup):
#def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.name = 'div'
#for tag in soup.findAll(name=['span']):
#tag.name = 'div'
return soup
#return soup
+1 -1
View File
@@ -16,7 +16,7 @@ class AdvancedUserRecipe1303841067(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
language = 'de_DE'
language = 'de'
#conversion_options = {'base_font_size': 20}
+67
View File
@@ -0,0 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class telepolis(BasicNewsRecipe):
title = u'Telepolis.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Twój telekomunikacyjny serwis informacyjny.\
Codzienne informacje, testy i artykuły,\
promocje, baza telefonów oraz centrum rozrywki'
oldest_article = 7
masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_tags = []
remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
preprocess_regexps = [(re.compile(r'<: .*? :>'),
lambda match: ''),
(re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
lambda match: ''),
(re.compile(r'<-ankieta.*?>'),
lambda match: ''),
(re.compile(r'\(Q\!\)'),
lambda match: ''),
(re.compile(r'\(plik.*?\)'),
lambda match: ''),
(re.compile(r'<br.*?><br.*?>', re.DOTALL),
lambda match: '')
]
extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
feeds = [
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
]
def print_version(self, url):
if 'news.php' in url:
print_url = url.replace('news.php', 'news_print.php')
else:
print_url = url.replace('artykuly.php', 'art_print.php')
return print_url
def preprocess_html(self, soup):
for image in soup.findAll('img'):
if 'm.jpg' in image['src']:
image_big = image['src']
image_big = image_big.replace('m.jpg', '.jpg')
image['src'] = image_big
logo = soup.find('tr')
logo.extract()
for tag in soup.findAll('tr'):
for strings in ['Wiadomość wydrukowana', 'copyright']:
if strings in self.tag_to_string(tag):
tag.extract()
return self.adeify_images(soup)
+44
View File
@@ -0,0 +1,44 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class tydenRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Tyden.cz'
publisher = u''
description = ''
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://www.tyden.cz/rss/rss.php?rubrika_id=6'),
(u'Politika', u'http://www.tyden.cz/rss/rss.php?rubrika_id=173'),
(u'Kauzy', u'http://www.tyden.cz/rss/rss.php?rubrika_id=340')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.tyden.cz/img/tyden-logo.png'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
remove_tags_before = dict(name='p', attrs={'id':['breadcrumbs']})
remove_tags_after = dict(name='p', attrs={'class':['author']})
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url
+1 -1
View File
@@ -4,7 +4,7 @@ class AdvancedUserRecipe1347997197(BasicNewsRecipe):
title = u'XpatLoop.com'
__author__ = 'laca'
oldest_article = 7
language = 'en_HUN'
language = 'en_HU'
auto_cleanup = True
masthead_url = 'http://www.xpatloop.com/images/cms/xs_logo.gif'
use_embedded_content = False
+1 -1
View File
@@ -16,7 +16,7 @@ class ZeitDe(BasicNewsRecipe):
category = 'news, Germany'
timefmt = ' [%a, %d %b %Y]'
publication_type = 'newspaper'
language = 'de_DE'
language = 'de'
encoding = 'UTF-8'
oldest_article = 7
Binary file not shown.
+5 -3
View File
@@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, socket, struct, subprocess
import os, socket, struct, subprocess, sys, glob
from distutils.spawn import find_executable
from PyQt4 import pyqtconfig
@@ -16,6 +16,7 @@ from setup import isosx, iswindows, islinux
OSX_SDK = '/Developer/SDKs/MacOSX10.5.sdk'
os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.5'
is64bit = sys.maxsize > 2**32
NMAKE = RC = msvc = MT = win_inc = win_lib = win_ddk = win_ddk_lib_dirs = None
if iswindows:
@@ -35,7 +36,7 @@ if iswindows:
MT = os.path.join(os.path.dirname(p), 'bin', 'mt.exe')
MT = os.path.join(SDK, 'bin', 'mt.exe')
os.environ['QMAKESPEC'] = 'win32-msvc'
ICU = r'Q:\icu'
ICU = os.environ.get('ICU_DIR', r'Q:\icu')
QMAKE = '/Volumes/sw/qt/bin/qmake' if isosx else 'qmake'
if find_executable('qmake-qt4'):
@@ -121,7 +122,8 @@ if iswindows:
zlib_lib_dirs = [sw_lib_dir]
zlib_libs = ['zlib']
magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.7.6')]
md = glob.glob(os.path.join(prefix, 'build', 'ImageMagick-*'))[-1]
magick_inc_dirs = [md]
magick_lib_dirs = [os.path.join(magick_inc_dirs[0], 'VisualMagick', 'lib')]
magick_libs = ['CORE_RL_wand_', 'CORE_RL_magick_']
podofo_inc = os.path.join(sw_inc_dir, 'podofo')
+3 -1
View File
@@ -18,7 +18,7 @@ from setup.build_environment import (chmlib_inc_dirs,
msvc, MT, win_inc, win_lib, win_ddk, magick_inc_dirs, magick_lib_dirs,
magick_libs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs,
icu_lib_dirs, win_ddk_lib_dirs, ft_libs, ft_lib_dirs, ft_inc_dirs,
zlib_libs, zlib_lib_dirs, zlib_inc_dirs)
zlib_libs, zlib_lib_dirs, zlib_inc_dirs, is64bit)
MT
isunix = islinux or isosx or isbsd
@@ -278,6 +278,8 @@ if iswindows:
ldflags = '/DLL /nologo /INCREMENTAL:NO /NODEFAULTLIB:libcmt.lib'.split()
#cflags = '/c /nologo /Ox /MD /W3 /EHsc /Zi'.split()
#ldflags = '/DLL /nologo /INCREMENTAL:NO /DEBUG'.split()
if is64bit:
cflags.append('/GS-')
for p in win_inc:
cflags.append('-I'+p)
+1 -1
View File
@@ -301,7 +301,7 @@ class LinuxFreeze(Command):
export MAGICK_CONFIGURE_PATH=$lib/{1}/config
export MAGICK_CODER_MODULE_PATH=$lib/{1}/modules-Q16/coders
export MAGICK_CODER_FILTER_PATH=$lib/{1}/modules-Q16/filters
$base/bin/{0} "$@"
exec $base/bin/{0} "$@"
''')
dest = self.j(self.obj_dir, bname+'.o')
+2 -8
View File
@@ -6,13 +6,11 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, shutil, subprocess, re
import os, shutil, subprocess
from setup import Command, __appname__, __version__
from setup.installer import VMInstaller
SIGNTOOL = r'C:\cygwin\home\kovid\sign.bat'
class Win(Command):
description = 'Build windows binary installers'
@@ -38,11 +36,7 @@ class Win32(VMInstaller):
def sign_msi(self):
print ('Signing installers ...')
raw = open(self.VM).read()
vmx = re.search(r'''launch_vmware\(['"](.+?)['"]''', raw).group(1)
subprocess.check_call(['vmrun', '-T', 'ws', '-gu', 'kovid', '-gp',
"et tu brutus", 'runProgramInGuest', vmx, 'cmd.exe', '/C',
r'C:\cygwin\home\kovid\sign.bat'])
subprocess.check_call(['ssh', self.VM_NAME, '~/sign.sh'], shell=False)
def download_installer(self):
installer = self.installer()
+18 -15
View File
@@ -10,18 +10,17 @@ import sys, os, shutil, glob, py_compile, subprocess, re, zipfile, time, textwra
from setup import (Command, modules, functions, basenames, __version__,
__appname__)
from setup.build_environment import msvc, MT, RC
from setup.build_environment import msvc, MT, RC, is64bit
from setup.installer.windows.wix import WixMixIn
ICU_DIR = r'Q:\icu'
OPENSSL_DIR = r'Q:\openssl'
QT_DIR = 'Q:\\Qt\\4.8.2'
ICU_DIR = os.environ.get('ICU_DIR', r'Q:\icu')
OPENSSL_DIR = os.environ.get('OPENSSL_DIR', r'Q:\openssl')
QT_DIR = os.environ.get('QT_DIR', 'Q:\\Qt\\4.8.2')
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
QTCURVE = r'C:\plugins\styles'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
LIBUNRAR = os.environ.get('UNRARDLL', 'C:\\Program Files\\UnrarDLL\\unrar.dll')
SW = r'C:\cygwin\home\kovid\sw'
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.7.6',
'VisualMagick', 'bin')
IMAGEMAGICK = os.path.join(SW, 'build',
'ImageMagick-*\\VisualMagick\\bin')
CRT = r'C:\Microsoft.VC90.CRT'
LZMA = r'Q:\easylzma\build\easylzma-0.0.8'
@@ -89,8 +88,9 @@ class Win32Freeze(Command, WixMixIn):
self.archive_lib_dir()
self.remove_CRT_from_manifests()
self.create_installer()
self.build_portable()
self.build_portable_installer()
if not is64bit:
self.build_portable()
self.build_portable_installer()
def remove_CRT_from_manifests(self):
'''
@@ -262,8 +262,8 @@ class Win32Freeze(Command, WixMixIn):
print
print 'Adding third party dependencies'
print '\tAdding unrar'
shutil.copyfile(LIBUNRAR,
os.path.join(self.dll_dir, os.path.basename(LIBUNRAR)))
shutil.copyfile(LIBUNRAR, os.path.join(self.dll_dir,
os.path.basename(LIBUNRAR).replace('64', '')))
print '\tAdding misc binary deps'
bindir = os.path.join(SW, 'bin')
@@ -278,12 +278,15 @@ class Win32Freeze(Command, WixMixIn):
if not ok: continue
dest = self.dll_dir
shutil.copy2(f, dest)
for x in ('zlib1.dll', 'libxml2.dll'):
shutil.copy2(self.j(bindir, x+'.manifest'), self.dll_dir)
for x in ('zlib1.dll', 'libxml2.dll', 'libxslt.dll', 'libexslt.dll'):
msrc = self.j(bindir, x+'.manifest')
if os.path.exists(msrc):
shutil.copy2(msrc, self.dll_dir)
# Copy ImageMagick
impath = glob.glob(IMAGEMAGICK)[-1]
for pat in ('*.dll', '*.xml'):
for f in glob.glob(self.j(IMAGEMAGICK, pat)):
for f in glob.glob(self.j(impath, pat)):
ok = True
for ex in ('magick++', 'x11.dll', 'xext.dll'):
if ex in f.lower(): ok = False
+318 -116
View File
@@ -4,16 +4,98 @@ Notes on setting up the windows development environment
Overview
----------
calibre and all its dependencies are compiled using Visual Studio 2008 express edition (free from MS). All the following instructions must be run in a visual studio command prompt unless otherwise noted.
calibre and all its dependencies are compiled using Visual Studio 2008. All the
following instructions must be run in a visual studio command prompt (the
various commands use unix notation, so if you want to use them directly, you
have to setup cygwin).
calibre contains build script to automate the building of the calibre installer. These scripts make certain assumptions about where dependencies are installed. Your best best is to setup a VM and replicate the paths mentioned below exactly.
calibre contains build script to automate the building of the calibre
installer. These scripts make certain assumptions about where dependencies are
installed. Your best best is to setup a VM and replicate the paths mentioned
below exactly.
Microsoft Visual Studio and Windows SDK
----------------------------------------
You have to use Visual Studio 2008 as that is the version Python 2.x works
with.
You need Visual Studio 2008 Express Edition for 32-bit and Professional for 64
bit.
1) Install Visual Studio
2) Install Visual Studio SP1 from http://www.microsoft.com/en-us/download/details.aspx?id=10986
(First check if the version of VS 2008 you have is not already SP1)
3) Install The Windows SDK. You need to install a version that is built for VS
2008. Get it from here: http://www.microsoft.com/en-us/download/details.aspx?id=3138
4) If you are building 64bit, edit the properties of the Visual Studio command
prompt shortcut to pass "amd64" instead of "x86" to the vsvars.bat file so that
it uses the 64 bit tools.
I've read that it is possible to use the 64-bit compiler that comes with the
Windows SDK With VS 2008 Express Edition, but I can't be bothered figuring it
out. Just use the Professional Edition.
Cygwin
------------
This is needed for automation of the build process, and the ease of use of the
unix shell (bash).
Install, vim, rsync, openssh, unzip, wget, make at a minimum.
After installing python run::
python setup/vcvars.py && echo 'source ~/.vcvars' >> ~/.bash_profile
To allow you to use the visual studio tools in the cygwin shell.
The following is only needed for automation (setting up ssh access to the
windows machine).
In order to build debug builds (.pdb files and sign files), you have to be able
to login as the normal user account with ssh. To do this, follow these steps:
* Setup a password for your user account
* Follow the steps here:
http://pcsupport.about.com/od/windows7/ht/auto-logon-windows-7.htm or
http://pcsupport.about.com/od/windowsxp/ht/auto-logon-xp.htm to allow the
machine to bootup without having to enter the password
* First clean out any existing cygwin ssh setup with::
net stop sshd
cygrunsrv -R sshd
net user sshd /DELETE
net user cyg_server /DELETE (delete any other cygwin users account you
can list them with net user)
rm -R /etc/ssh*
mkpasswd -cl > /etc/passwd
mkgroup --local > /etc/group
* Assign the necessary rights to the normal user account::
editrights.exe -a SeAssignPrimaryTokenPrivilege -u kovid
editrights.exe -a SeCreateTokenPrivilege -u kovid
editrights.exe -a SeTcbPrivilege -u kovid
editrights.exe -a SeServiceLogonRight -u kovid
* Run::
ssh-host-config
And answer (yes) to all questions. If it asks do you want to use a
different user name, specify the name of your user account and enter
username and password (it asks on Win 7 not on Win XP)
* On Windows XP, I also had to run::
passwd -R
to allow sshd to use my normal user account even with public key
authentication. See http://cygwin.com/cygwin-ug-net/ntsec.html for
details. On Windows 7 this wasn't necessary for some reason.
* Start sshd with::
net start sshd
* See http://www.kgx.net.nz/2010/03/cygwin-sshd-and-windows-7/ for details
Pass port 22 through Windows firewall. Create ~/.ssh/authorized_keys
Basic dependencies
--------------------
Install cygwin and setup sshd (optional). Used to enable automation of the calibre build VM from linux, not needed if you are building manually.
Install cmake, python, WiX (WiX is used to generate the .msi installer)
Install MS Visual Studio 2008, cmake, python and WiX.
You have to
Set CMAKE_PREFIX_PATH environment variable to C:\cygwin\home\kovid\sw
@@ -21,10 +103,16 @@ This is where all dependencies will be installed.
Add C:\Python27\Scripts and C:\Python27 to PATH
Edit mimetypes.py in C:\Python27\Lib and set _winreg = None to prevent reading of mimetypes from the windows registry
Edit mimetypes.py in C:\Python27\Lib and set _winreg = None to prevent reading
of mimetypes from the windows registry
Install setuptools from http://pypi.python.org/pypi/setuptools
If there are no windows binaries already compiled for the version of python you are using then download the source and run the following command in the folder where the source has been unpacked::
Python packages
------------------
Install setuptools from http://pypi.python.org/pypi/setuptools If there are no
windows binaries already compiled for the version of python you are using then
download the source and run the following command in the folder where the
source has been unpacked::
python setup.py install
@@ -32,10 +120,9 @@ Run the following command to install python dependencies::
easy_install --always-unzip -U mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto cssselect
Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)
Install pywin32 and edit win32com\__init__.py setting _frozen = True and
__gen_path__ to a temp dir (otherwise it tries to set it to a dir in the install tree which leads to permission errors)
__gen_path__ to a temp dir (otherwise it tries to set it to a dir in the
install tree which leads to permission errors)
Note that you should use::
import tempfile
@@ -43,42 +130,58 @@ Note that you should use::
tempfile.gettempdir(), "gen_py",
"%d.%d" % (sys.version_info[0], sys.version_info[1]))
Use gettempdir instead of the win32 api method as gettempdir returns a temp dir that is guaranteed to actually work.
Use gettempdir instead of the win32 api method as gettempdir returns a temp dir
that is guaranteed to actually work.
Also edit win32com\client\gencache.py and change the except IOError on line 57 to catch all exceptions.
Also edit win32com\client\gencache.py and change the except IOError on line 57
to catch all exceptions.
SQLite
---------
Put sqlite3*.h from the sqlite windows amlgamation in ~/sw/include
Put sqlite3*.h from the sqlite windows amalgamation in ~/sw/include
APSW
-----
Download source from http://code.google.com/p/apsw/downloads/list and run in visual studio prompt
python setup.py fetch --all build --missing-checksum-ok --enable-all-extensions install test
python setup.py fetch --all --missing-checksum-ok build --enable-all-extensions install test
OpenSSL
--------
First install ActiveState Perl if you dont already have perl in windows
Download and untar the openssl tarball, follow the instructions in INSTALL.W32 (use no-asm)
Then, get nasm.exe from
http://www.nasm.us/pub/nasm/releasebuilds/2.05/nasm-2.05-win32.zip and put it
somewhere on your PATH (I chose ~/sw/bin)
Download and untar the openssl tarball, follow the instructions in INSTALL.(W32|W64)
to install use prefix q:\openssl
perl Configure VC-WIN32 no-asm enable-static-engine --prefix=Q:/openssl
ms\do_ms.bat
nmake -f ms\ntdll.mak
nmake -f ms\ntdll.mak test
nmake -f ms\ntdll.mak install
For 32-bit::
perl Configure VC-WIN32 no-asm enable-static-engine --prefix=Q:/openssl
ms\do_ms.bat
nmake -f ms\ntdll.mak
nmake -f ms\ntdll.mak test
nmake -f ms\ntdll.mak install
For 64-bit::
perl Configure VC-WIN64A no-asm enable-static-engine --prefix=C:/cygwin/home/kovid/sw/private/openssl
ms\do_win64a
nmake -f ms\ntdll.mak
nmake -f ms\ntdll.mak test
nmake -f ms\ntdll.mak install
Qt
--------
Download Qt sourcecode (.zip) from: http://qt-project.org/downloads
Extract Qt sourcecode to C:\Qt\current
Extract Qt sourcecode to C:\Qt\4.x.x.
Qt uses its own routine to locate and load "system libraries" including the openssl libraries needed for "Get Books". This means that we have to apply the following patch to have Qt load the openssl libraries bundled with calibre:
Qt uses its own routine to locate and load "system libraries" including the
openssl libraries needed for "Get Books". This means that we have to apply the
following patch to have Qt load the openssl libraries bundled with calibre:
--- src/corelib/plugin/qsystemlibrary.cpp 2011-02-22 05:04:00.000000000 -0700
@@ -97,7 +200,7 @@ Now, run configure and make::
-no-plugin-manifests is needed so that loading the plugins does not fail looking for the CRT assembly
configure -ltcg -opensource -release -qt-zlib -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -no-plugin-manifests -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
./configure.exe -ltcg -opensource -release -qt-zlib -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -nomake tools -no-plugin-manifests -openssl -I $OPENSSL_DIR/include -L $OPENSSL_DIR/lib && nmake
Add the path to the bin folder inside the Qt dir to your system PATH.
@@ -106,9 +209,7 @@ SIP
Available from: http://www.riverbankcomputing.co.uk/software/sip/download ::
python configure.py -p win32-msvc2008
nmake
nmake install
python configure.py -p win32-msvc2008 && nmake && nmake install
PyQt4
----------
@@ -119,15 +220,6 @@ Compiling instructions::
nmake
nmake install
Python Imaging Library
------------------------
Install as normal using installer at http://www.lfd.uci.edu/~gohlke/pythonlibs/
Test it on the target system with
calibre-debug -c "import _imaging, _imagingmath, _imagingft, _imagingcms"
ICU
-------
@@ -151,71 +243,63 @@ Optionally run make check
Libunrar
----------
http://www.rarlab.com/rar/UnRARDLL.exe install and add C:\Program Files\UnrarDLL to PATH
Get the source from http://www.rarlab.com/rar_add.htm
lxml
------
Open UnrarDll.vcproj, change build type to release.
If building 64 bit change Win32 to x64.
http://pypi.python.org/pypi/lxml
Build the Solution, find the dll in the build subdir. As best as I can tell,
the vcproj already defines the SILENT preprocessor directive, but you should
test this.
jpeg-7
-------
.. http://www.rarlab.com/rar/UnRARDLL.exe install and add C:\Program Files\UnrarDLL to PATH
Copy::
jconfig.vc to jconfig.h, makejsln.vc9 to jpeg.sln,
makeasln.vc9 to apps.sln, makejvcp.vc9 to jpeg.vcproj,
makecvcp.vc9 to cjpeg.vcproj, makedvcp.vc9 to djpeg.vcproj,
maketvcp.vc9 to jpegtran.vcproj, makervcp.vc9 to rdjpgcom.vcproj, and
makewvcp.vc9 to wrjpgcom.vcproj. (Note that the renaming is critical!)
Load jpeg.sln in Visual Studio
Goto Project->Properties->General Properties and change Configuration Type to dll
Add
#define USE_WINDOWS_MESSAGEBOX
to jconfig.h (this will cause error messages to show up in a box)
Change the definitions of GLOBAL and EXTERN in jmorecfg.h to
#define GLOBAL(type) __declspec(dllexport) type
#define EXTERN(type) extern __declspec(dllexport) type
cp build/jpeg-7/Release/jpeg.dll bin/
cp build/jpeg-7/Release/jpeg.lib build/jpeg-7/Release/jpeg.exp
cp build/jpeg-7/jerror.h build/jpeg-7/jpeglib.h build/jpeg-7/jconfig.h build/jpeg-7/jmorecfg.h include/
TODO: 64-bit check that SILENT is defined and that the ctypes bindings actuall
work
zlib
------
nmake -f win32/Makefile.msc
nmake -f win32/Makefile.msc test
Build with::
nmake -f win32/Makefile.msc
nmake -f win32/Makefile.msc test
cp zlib1.dll* ../../bin
cp zlib.lib zdll.* ../../lib
cp zconf.h zlib.h ../../include
cp zlib1.dll* ../../bin
cp zlib.lib zdll.* ../../lib
cp zconf.h zlib.h ../../include
jpeg-8
-------
Get the source code from: http://sourceforge.net/projects/libjpeg-turbo/files/
Run::
chmod +x cmakescripts/* && cd build
cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DWITH_JPEG8=1 ..
nmake
cp sharedlib/jpeg8.dll* ~/sw/bin/
cp sharedlib/jpeg.lib ~/sw/lib/
cp jconfig.h ../jerror.h ../jpeglib.h ../jmorecfg.h ~/sw/include
libpng
---------
cp scripts/CMakelists.txt .
mkdir build
Run cmake-gui.exe with source directory . and build directory build
You will have to point to sw/lib/zdll.lib and sw/include for zlib
Also disable PNG_NO_STDIO and PNG_NO_CONSOLE_IO
Download the libpng .zip source file from:
http://www.libpng.org/pub/png/libpng.html
Now open PNG.sln in VS2008
Set Build type to Release
cp build/libpng-1.2.40/build/Release/libpng12.dll bin/
cp build/libpng-1.2.40/build/Release/png12.* lib/
cp build/libpng-1.2.40/png.h build/libpng-1.2.40/pngconf.h include/
Run::
mkdir build && cd build
cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DZLIB_INCLUDE_DIR=C:/cygwin/home/kovid/sw/include -DZLIB_LIBRARY=C:/cygwin/home/kovid/sw/lib/zdll.lib ..
nmake
cp libpng*.dll ~/sw/bin/
cp libpng*.lib ~/sw/lib/
cp pnglibconf.h ../png.h ../pngconf.h ~/sw/include/
freetype
-----------
Get the .zip source from: http://download.savannah.gnu.org/releases/freetype/
Edit *all copies* of the file ftoption.h and add to generate a .lib
and a correct dll
@@ -225,42 +309,143 @@ and a correct dll
VS 2008 .sln file is present, open it
Change active build type to release mutithreaded
* If you are doing x64 build, click the Win32 dropdown, select
Configuration manager->Active solution platform -> New -> x64
Project->Properties->Configuration Properties
change configuration type to dll
* Change active build type to release mutithreaded
cp build/freetype-2.3.9/objs/release_mt/freetype.dll bin/
* Project->Properties->Configuration Properties change configuration type
to dll and build solution
cp "`find . -name *.dll`" ~/sw/bin/
cp "`find . -name freetype.lib`" ~/sw/lib/
Now change configuration back to static for .lib and build solution
cp "`find . -name freetype*MT.lib`" ~/sw/lib/
Now change configuration back to static for .lib
cp build/freetype-2.3.9/objs/win32/vc2008/freetype239MT.lib lib/
cp -rf build/freetype-2.3.9/include/* include/
cp -rf include/* ~/sw/include/
TODO: Test if this bloody thing actually works on 64 bit (apparently freetype
assumes sizeof(long) == sizeof(ptr) which is not true in Win64. See for
example: http://forum.openscenegraph.org/viewtopic.php?t=2880
expat
--------
Has a VC 6 project file expat.dsw
Get from: http://sourceforge.net/projects/expat/files/expat/
Set active build to Relase and change build type to dll
Apparently expat requires stdint.h which VS 2008 does not have. So we get our
own.
cp build/expat-2.0.1/win32/bin/Release/*.lib lib/
cp build/expat-2.0.1/win32/bin/Release/*.exp lib/
cp build/expat-2.0.1/win32/bin/Release/*.dll bin/
cp build/expat-2.0.1/lib/expat.h build/expat-2.0.1/lib/expat_external.h include/
Run::
cd lib
wget http://msinttypes.googlecode.com/svn/trunk/stdint.h
mkdir build && cd build
cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release ..
nmake
cp expat.dll ~/sw/bin/ && cp expat.lib ~/sw/lib/
cp ../lib/expat.h ../lib/expat_external.h ~/sw/include
libiconv
----------
Run::
mkdir vs2008 && cd vs2008
Then follow these instructions:
http://www.codeproject.com/Articles/302012/How-to-Build-libiconv-with-Microsoft-Visual-Studio
Change the type to Release and config to x64 or Win32 and Build solution and
then::
cp "`find . -name *.dll`" ~/sw/bin/
cp "`find . -name *.dll.manifest`" ~/sw/bin/
cp "`find . -name *.lib`" ~/sw/lib/iconv.lib
cp "`find . -name iconv.h`" ~/sw/include/
Information for using a static version of libiconv is at the link above.
libxml2
-------------
cd win32
cscript configure.js include=C:\cygwin\home\kovid\sw\include lib=C:\cygwin\home\sw\lib prefix=C:\cygwin\home\kovid\sw zlib=yes iconv=no
nmake /f Makefile.msvc
nmake /f Makefile.msvc install
mv lib/libxml2.dll bin/
cp ./build/libxml2-2.7.5/win32/bin.msvc/*.manifest bin/
Get it from: ftp://xmlsoft.org/libxml2/
Run::
cd win32
cscript.exe configure.js include=C:/cygwin/home/kovid/sw/include lib=C:/cygwin/home/kovid/sw/lib prefix=C:/cygwin/home/kovid/sw zlib=yes iconv=yes
nmake /f Makefile.msvc
mkdir -p ~/sw/include/libxml2/libxml
cp include/libxml/*.h ~/sw/include/libxml2/libxml/
find . -type f \( -name "*.dll" -o -name "*.dll.manifest" \) -exec cp "{}" ~/sw/bin/ \;
find . -name libxml2.lib -exec cp "{}" ~/sw/lib/ \;
libxslt
---------
Get it from: ftp://xmlsoft.org/libxml2/
Run::
cd win32
cscript.exe configure.js include=C:/cygwin/home/kovid/sw/include include=C:/cygwin/home/kovid/sw/include/libxml2 lib=C:/cygwin/home/kovid/sw/lib prefix=C:/cygwin/home/kovid/sw zlib=yes iconv=yes
nmake /f Makefile.msvc
mkdir -p ~/sw/include/libxslt ~/sw/include/libexslt
cp libxslt/*.h ~/sw/include/libxslt/
cp libexslt/*.h ~/sw/include/libexslt/
find . -type f \( -name "*.dll" -o -name "*.dll.manifest" \) -exec cp "{}" ~/sw/bin/ \;
find . -name lib*xslt.lib -exec cp "{}" ~/sw/lib/ \;
lxml
------
Get the source from: http://pypi.python.org/pypi/lxml
Add the following to the top of setupoptions.py::
if option == 'cflags':
return ['-IC:/cygwin/home/kovid/sw/include/libxml2',
'-IC:/cygwin/home/kovid/sw/include']
else:
return ['-LC:/cygwin/home/kovid/sw/lib']
Then, edit src/lxml/includes/etree_defs.h and change the section starting with
#ifndef LIBXML2_NEW_BUFFER
to
#ifdef LIBXML2_NEW_BUFFER
# define xmlBufContent(buf) xmlBufferContent(buf)
# define xmlBufLength(buf) xmlBufferLength(buf)
#endif
Run::
python setup.py install
Python Imaging Library
------------------------
For 32-bit:
Install as normal using installer at http://www.lfd.uci.edu/~gohlke/pythonlibs/
For 64-bit:
Download from http://pypi.python.org/pypi/Pillow/
Edit setup.py setting the ROOT values, like this::
SW = r'C:\cygwin\home\kovid\sw'
JPEG_ROOT = ZLIB_ROOT = FREETYPE_ROOT = (SW+r'\lib', SW+r'\include')
Build and install with::
python setup.py build
python setup.py install
Note that the lcms module will not be built. PIL requires lcms-1.x but only
lcms-2.x can be compiled as a 64 bit library.
Test it on the target system with
calibre-debug -c "from PIL import Image; import _imaging, _imagingmath, _imagingft"
kdewin32-msvc
----------------
I dont think this is needed any more, I've left it here just in case I'm wrong.
Get it from http://www.winkde.org/pub/kde/ports/win32/repository/kdesupport/
mkdir build
Run cmake
@@ -279,29 +464,34 @@ cp build/kdewin32-msvc-0.3.9/include/*.h include/
poppler
-------------
In Cmake: disable GTK, Qt, OPenjpeg, cpp, lcms, gtk_tests, qt_tests. Enable qt4, jpeg, png and zlib
mkdir build
NOTE: poppler must be built as a static library, unless you build the qt4 bindings
Run the cmake GUI which will find the various dependencies automatically.
On 64 bit cmake might not let you choose Visual Studio 2008, in whcih case
leave the source field blank, click configure choose Visual Studio 2008 and
then enter the source field.
cp build/utils/Release/*.exe ../../bin/
In Cmake: disable GTK, Qt, OPenjpeg, cpp, lcms, gtk_tests, qt_tests. Enable
jpeg, png and zlib::
cp build/utils/Release/*.exe ../../bin/
podofo
----------
Download from http://podofo.sourceforge.net/download.html
Add the following three lines near the top of CMakeLists.txt
SET(WANT_LIB64 FALSE)
SET(PODOFO_BUILD_SHARED TRUE)
SET(PODOFO_BUILD_STATIC FALSE)
cp build/podofo-*/build/src/Release/podofo.dll bin/
cp build/podofo-*/build/src/Release/podofo.lib lib/
cp build/podofo-*/build/src/Release/podofo.exp lib/
cp build/podofo-*/build/podofo_config.h include/podofo/
cp -r build/podofo-*/src/* include/podofo/
You have to use >=0.9.1
Run::
cp "`find . -name *.dll`" ~/sw/bin/
cp "`find . -name *.lib`" ~/sw/lib/
mkdir ~/sw/include/podofo
cp build/podofo_config.h ~/sw/include/podofo
cp -r src/* ~/sw/include/podofo/
ImageMagick
@@ -324,7 +514,7 @@ Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
Now open VisualMagick/VisualDynamicMT.sln set to Release
Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects.
F7 for build project, you will get one error due to the removal of xlib, ignore
F7 for build solution, you will get one error due to the removal of xlib, ignore
it.
netifaces
@@ -334,10 +524,10 @@ Download the source tarball from http://alastairs-place.net/projects/netifaces/
Rename netifaces.c to netifaces.cpp and make the same change in setup.py
Run
Run::
python setup.py build
cp `find build/ -name *.pyd` /cygdrive/c/Python27/Lib/site-packages/
python setup.py build
cp build/lib.win32-2.7/netifaces.pyd /cygdrive/c/Python27/Lib/site-packages/
psutil
--------
@@ -352,11 +542,23 @@ cp -r build/lib.win32-*/* /cygdrive/c/Python27/Lib/site-packages/
easylzma
----------
This is only needed to build the portable installer.
Get it from http://lloyd.github.com/easylzma/ (use the trunk version)
Run cmake and build the Visual Studio solution (generates CLI tools and dll and
static lib automatically)
chmlib
-------
Download the zip source code from: http://www.jedrea.com/chmlib/
Run::
cd src && unzip ./ChmLib-ds6.zip
Then open ChmLib.dsw in Visual Studio, change the configuration to Release
(Win32|x64) and build solution, this will generate a static library in
Release/ChmLib.lib
calibre
---------
+2 -8
View File
@@ -217,19 +217,15 @@ wchar_t* get_app_dirw() {
void load_python_dll() {
char *app_dir, *fc_dir, *fc_file, *dll_dir, *qt_plugin_dir;
char *app_dir, *dll_dir, *qt_plugin_dir;
size_t l;
app_dir = get_app_dir();
l = strlen(app_dir)+20;
dll_dir = (char*) calloc(l, sizeof(char));
fc_dir = (char*) calloc(l, sizeof(char));
fc_file = (char*) calloc(l, sizeof(char));
qt_plugin_dir = (char*) calloc(l, sizeof(char));
if (!dll_dir || !qt_plugin_dir || !fc_dir) ExitProcess(_show_error(L"Out of memory", L"", 1));
if (!dll_dir || !qt_plugin_dir) ExitProcess(_show_error(L"Out of memory", L"", 1));
_snprintf_s(dll_dir, l, _TRUNCATE, "%sDLLs", app_dir);
_snprintf_s(fc_dir, l, _TRUNCATE, "%sfontconfig", app_dir);
_snprintf_s(fc_file, l, _TRUNCATE, "%s\\fonts.conf", fc_dir);
_snprintf_s(qt_plugin_dir, l, _TRUNCATE, "%sqt_plugins", app_dir);
free(app_dir);
@@ -237,8 +233,6 @@ void load_python_dll() {
_putenv_s("MAGICK_CONFIGURE_PATH", dll_dir);
_putenv_s("MAGICK_CODER_MODULE_PATH", dll_dir);
_putenv_s("MAGICK_FILTER_MODULE_PATH", dll_dir);
_putenv_s("FC_CONFIG_DIR", fc_dir);
_putenv_s("FC_CONFIG_FILE", fc_file);
_putenv_s("QT_PLUGIN_PATH", qt_plugin_dir);
if (!SetDllDirectoryA(dll_dir)) ExitProcess(show_last_error(L"Failed to set DLL directory."));
+463 -463
View File
File diff suppressed because it is too large Load Diff
+6 -6
View File
@@ -18,14 +18,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-09-04 18:42+0000\n"
"Last-Translator: SimonFS <simonschuette@arcor.de>\n"
"PO-Revision-Date: 2012-11-08 15:28+0000\n"
"Last-Translator: Elmux <bla.mail@gmx.net>\n"
"Language-Team: German <debian-l10n-german@lists.debian.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-09-05 04:37+0000\n"
"X-Generator: Launchpad (build 15901)\n"
"X-Launchpad-Export-Date: 2012-11-09 04:39+0000\n"
"X-Generator: Launchpad (build 16250)\n"
"Language: de\n"
#. name for aaa
@@ -58,7 +58,7 @@ msgstr "Ambrak"
#. name for aah
msgid "Arapesh; Abu'"
msgstr ""
msgstr "Arapesh;Abu' (Papua-Neuguinea)"
#. name for aai
msgid "Arifama-Miniafia"
@@ -102,7 +102,7 @@ msgstr "Aasáx"
#. name for aat
msgid "Albanian; Arvanitika"
msgstr ""
msgstr "Albanisch, Arvanitikanisch"
#. name for aau
msgid "Abau"
+82
View File
@@ -0,0 +1,82 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, sys, subprocess
from distutils.msvc9compiler import find_vcvarsall, get_build_version
plat = 'amd64' if sys.maxsize > 2**32 else 'x86'
def remove_dups(variable):
old_list = variable.split(os.pathsep)
new_list = []
for i in old_list:
if i not in new_list:
new_list.append(i)
return os.pathsep.join(new_list)
def query_process(cmd):
result = {}
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
try:
stdout, stderr = popen.communicate()
if popen.wait() != 0:
raise RuntimeError(stderr.decode("mbcs"))
stdout = stdout.decode("mbcs")
for line in stdout.splitlines():
if '=' not in line:
continue
line = line.strip()
key, value = line.split('=', 1)
key = key.lower()
if key == 'path':
if value.endswith(os.pathsep):
value = value[:-1]
value = remove_dups(value)
result[key] = value
finally:
popen.stdout.close()
popen.stderr.close()
return result
def query_vcvarsall():
vcvarsall = find_vcvarsall(get_build_version())
return query_process('"%s" %s & set' % (vcvarsall, plat))
env = query_vcvarsall()
paths = env['path'].split(';')
lib = env['lib']
include = env['include']
libpath = env['libpath']
def unix(paths):
up = []
for p in paths:
prefix, p = p.replace(os.sep, '/').partition('/')[0::2]
up.append('/cygdrive/%s/%s'%(prefix[0].lower(), p))
return ':'.join(up)
raw = '''\
#!/bin/sh
export PATH="%s:$PATH"
export LIB="%s"
export INCLUDE="%s"
export LIBPATH="%s"
'''%(unix(paths), lib, include, libpath)
with open(os.path.expanduser('~/.vcvars'), 'wb') as f:
f.write(raw.encode('utf-8'))
+3 -9
View File
@@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 9, 6)
numeric_version = (0, 9, 7)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
@@ -14,14 +14,6 @@ Various run time constants.
import sys, locale, codecs, os, importlib, collections
_tc = None
def terminal_controller():
global _tc
if _tc is None:
from calibre.utils.terminfo import TerminalController
_tc = TerminalController(sys.stdout)
return _tc
_plat = sys.platform.lower()
iswindows = 'win32' in _plat or 'win64' in _plat
isosx = 'darwin' in _plat
@@ -37,6 +29,8 @@ isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None
ispy3 = sys.version_info.major > 2
isxp = iswindows and sys.getwindowsversion().major < 6
isworker = os.environ.has_key('CALIBRE_WORKER') or os.environ.has_key('CALIBRE_SIMPLE_WORKER')
if isworker:
os.environ.pop('CALIBRE_FORCE_ANSI', None)
try:
preferred_encoding = locale.getpreferredencoding()
+14
View File
@@ -308,6 +308,10 @@ class FileTypePlugin(Plugin): # {{{
#: to the database
on_import = False
#: If True, this plugin is run after books are added
#: to the database
on_postimport = False
#: If True, this plugin is run just before a conversion
on_preprocess = False
@@ -337,6 +341,16 @@ class FileTypePlugin(Plugin): # {{{
# Default implementation does nothing
return path_to_ebook
def postimport(self, book_id, book_format, db):
'''
Called post import, i.e., after the book file has been added to the database.
:param book_id: Database id of the added book.
:param book_format: The file type of the book that was added.
:param db: Library database.
'''
pass # Default implementation does nothing
# }}}
class MetadataReaderPlugin(Plugin): # {{{
+12 -12
View File
@@ -1433,15 +1433,6 @@ class StoreFoylesUKStore(StoreBase):
formats = ['EPUB', 'PDF']
affiliate = True
class StoreGandalfStore(StoreBase):
name = 'Gandalf'
author = u'Tomasz Długosz'
description = u'Księgarnia internetowa Gandalf.'
actual_plugin = 'calibre.gui2.store.stores.gandalf_plugin:GandalfStore'
headquarters = 'PL'
formats = ['EPUB', 'PDF']
class StoreGoogleBooksStore(StoreBase):
name = 'Google Books'
description = u'Google Books'
@@ -1472,7 +1463,7 @@ class StoreKoboStore(StoreBase):
class StoreLegimiStore(StoreBase):
name = 'Legimi'
author = u'Tomasz Długosz'
description = u'Tanie oraz darmowe ebooki, egazety i blogi w formacie EPUB, wprost na Twój e-czytnik, iPhone, iPad, Android i komputer'
description = u'Ebooki w formacie EPUB, MOBI i PDF'
actual_plugin = 'calibre.gui2.store.stores.legimi_plugin:LegimiStore'
headquarters = 'PL'
@@ -1566,6 +1557,15 @@ class StorePragmaticBookshelfStore(StoreBase):
headquarters = 'US'
formats = ['EPUB', 'MOBI', 'PDF']
class StorePublioStore(StoreBase):
name = 'Publio'
description = u'Publio.pl to księgarnia internetowa, w której mogą Państwo nabyć e-booki i audiobooki.'
actual_plugin = 'calibre.gui2.store.stores.publio_plugin:PublioStore'
author = u'Tomasz Długosz'
headquarters = 'PL'
formats = ['EPUB', 'MOBI', 'PDF']
class StoreRW2010Store(StoreBase):
name = 'RW2010'
description = u'Polski serwis self-publishingowy. Pliki PDF, EPUB i MOBI. Maksymalna cena utworu nie przekracza u nas 10 złotych!'
@@ -1675,7 +1675,6 @@ plugins += [
StoreEscapeMagazineStore,
StoreFeedbooksStore,
StoreFoylesUKStore,
StoreGandalfStore,
StoreGoogleBooksStore,
StoreGutenbergStore,
StoreKoboStore,
@@ -1689,6 +1688,7 @@ plugins += [
StoreOpenBooksStore,
StoreOzonRUStore,
StorePragmaticBookshelfStore,
StorePublioStore,
StoreRW2010Store,
StoreSmashwordsStore,
StoreVirtualoStore,
@@ -1716,7 +1716,7 @@ if __name__ == '__main__':
ret = 0
for x in ('lxml', 'calibre.ebooks.BeautifulSoup', 'uuid',
'calibre.utils.terminfo', 'calibre.utils.magick', 'PIL', 'Image',
'calibre.utils.terminal', 'calibre.utils.magick', 'PIL', 'Image',
'sqlite3', 'mechanize', 'httplib', 'xml'):
if x in sys.modules:
ret = 1
+23
View File
@@ -104,14 +104,17 @@ def is_disabled(plugin):
# File type plugins {{{
_on_import = {}
_on_postimport = {}
_on_preprocess = {}
_on_postprocess = {}
def reread_filetype_plugins():
global _on_import
global _on_postimport
global _on_preprocess
global _on_postprocess
_on_import = {}
_on_postimport = {}
_on_preprocess = {}
_on_postprocess = {}
@@ -122,6 +125,10 @@ def reread_filetype_plugins():
if not _on_import.has_key(ft):
_on_import[ft] = []
_on_import[ft].append(plugin)
if plugin.on_postimport:
if not _on_postimport.has_key(ft):
_on_postimport[ft] = []
_on_postimport[ft].append(plugin)
if plugin.on_preprocess:
if not _on_preprocess.has_key(ft):
_on_preprocess[ft] = []
@@ -163,6 +170,22 @@ run_plugins_on_preprocess = functools.partial(_run_filetype_plugins,
occasion='preprocess')
run_plugins_on_postprocess = functools.partial(_run_filetype_plugins,
occasion='postprocess')
def run_plugins_on_postimport(db, book_id, fmt):
customization = config['plugin_customization']
fmt = fmt.lower()
for plugin in _on_postimport.get(fmt, []):
if is_disabled(plugin):
continue
plugin.site_customization = customization.get(plugin.name, '')
with plugin:
try:
plugin.postimport(book_id, fmt, db)
except:
print ('Running file type plugin %s failed with traceback:'%
plugin.name)
traceback.print_exc()
# }}}
# Plugin customization {{{
+7 -31
View File
@@ -15,7 +15,13 @@ def option_parser():
parser = OptionParser(usage='''\
%prog [options]
Run an embedded python interpreter.
Various command line interfaces useful for debugging calibre. With no options,
this command starts an embedded python interpreter. You can also run the main
calibre GUI and the calibre viewer in debug mode.
It also contains interfaces to various bits of calibre that do not have
dedicated command line tools, such as font subsetting, tweaking ebooks and so
on.
''')
parser.add_option('-c', '--command', help='Run python code.', default=None)
parser.add_option('-e', '--exec-file', default=None, help='Run the python code in file.')
@@ -37,9 +43,6 @@ Run an embedded python interpreter.
help='Run the ebook viewer',)
parser.add_option('--paths', default=False, action='store_true',
help='Output the paths necessary to setup the calibre environment')
parser.add_option('--migrate', action='store_true', default=False,
help='Migrate old database. Needs two arguments. Path '
'to library1.db and path to new library folder.')
parser.add_option('--add-simple-plugin', default=None,
help='Add a simple plugin (i.e. a plugin that consists of only a '
'.py file), by specifying the path to the py file containing the '
@@ -118,28 +121,6 @@ def reinit_db(dbpath, callback=None, sql_dump=None):
os.remove(dest)
prints('Database successfully re-initialized')
def migrate(old, new):
from calibre.utils.config import prefs
from calibre.library.database import LibraryDatabase
from calibre.library.database2 import LibraryDatabase2
from calibre.utils.terminfo import ProgressBar
from calibre.constants import terminal_controller
class Dummy(ProgressBar):
def setLabelText(self, x): pass
def setAutoReset(self, y): pass
def reset(self): pass
def setRange(self, min, max):
self.min = min
self.max = max
def setValue(self, val):
self.update(float(val)/getattr(self, 'max', 1))
db = LibraryDatabase(old)
db2 = LibraryDatabase2(new)
db2.migrate_old(db, Dummy(terminal_controller(), 'Migrating database...'))
prefs['library_path'] = os.path.abspath(new)
print 'Database migrated to', os.path.abspath(new)
def debug_device_driver():
from calibre.devices import debug
debug(ioreg_to_tmp=True, buf=sys.stdout)
@@ -249,11 +230,6 @@ def main(args=sys.argv):
exec opts.command
elif opts.debug_device_driver:
debug_device_driver()
elif opts.migrate:
if len(args) < 3:
print 'You must specify the path to library1.db and the path to the new library folder'
return 1
migrate(args[1], args[2])
elif opts.add_simple_plugin is not None:
add_simple_plugin(opts.add_simple_plugin)
elif opts.paths:
+7 -13
View File
@@ -11,7 +11,6 @@ from optparse import OptionParser
from calibre import __version__, __appname__, human_readable
from calibre.devices.errors import PathError
from calibre.utils.terminfo import TerminalController
from calibre.devices.errors import ArgumentError, DeviceError, DeviceLocked
from calibre.customize.ui import device_plugins
from calibre.devices.scanner import DeviceScanner
@@ -20,8 +19,7 @@ from calibre.utils.config import device_prefs
MINIMUM_COL_WIDTH = 12 #: Minimum width of columns in ls output
class FileFormatter(object):
def __init__(self, file, term):
self.term = term
def __init__(self, file):
self.is_dir = file.is_dir
self.is_readonly = file.is_readonly
self.size = file.size
@@ -94,7 +92,7 @@ def info(dev):
print "Software version:", info[2]
print "Mime type: ", info[3]
def ls(dev, path, term, recurse=False, color=False, human_readable_size=False, ll=False, cols=0):
def ls(dev, path, recurse=False, human_readable_size=False, ll=False, cols=0):
def col_split(l, cols): # split list l into columns
rows = len(l) / cols
if len(l) % cols:
@@ -126,14 +124,13 @@ def ls(dev, path, term, recurse=False, color=False, human_readable_size=False, l
for file in files:
size = len(str(file.size))
if human_readable_size:
file = FileFormatter(file, term)
file = FileFormatter(file)
size = len(file.human_readable_size)
if size > maxlen: maxlen = size
for file in files:
file = FileFormatter(file, term)
file = FileFormatter(file)
name = file.name if ll else file.isdir_name
lsoutput.append(name)
if color: name = file.name_in_color
lscoloutput.append(name)
if ll:
size = str(file.size)
@@ -173,10 +170,8 @@ def shutdown_plugins():
pass
def main():
term = TerminalController()
cols = term.COLS
if not cols: # On windows terminal width is unknown
cols = 80
from calibre.utils.terminal import geometry
cols = geometry()[0]
parser = OptionParser(usage="usage: %prog [options] command args\n\ncommand "+
"is one of: info, books, df, ls, cp, mkdir, touch, cat, rm, eject, test_file\n\n"+
@@ -260,7 +255,6 @@ def main():
dev.mkdir(args[0])
elif command == "ls":
parser = OptionParser(usage="usage: %prog ls [options] path\nList files on the device\n\npath must begin with / or card:/")
parser.add_option("--color", help="show ls output in color", dest="color", action="store_true", default=False)
parser.add_option("-l", help="In addition to the name of each file, print the file type, permissions, and timestamp (the modification time, in the local timezone). Times are local.", dest="ll", action="store_true", default=False)
parser.add_option("-R", help="Recursively list subdirectories encountered. /dev and /proc are omitted", dest="recurse", action="store_true", default=False)
parser.remove_option("-h")
@@ -269,7 +263,7 @@ def main():
if len(args) != 1:
parser.print_help()
return 1
print ls(dev, args[0], term, color=options.color, recurse=options.recurse, ll=options.ll, human_readable_size=options.hrs, cols=cols),
print ls(dev, args[0], recurse=options.recurse, ll=options.ll, human_readable_size=options.hrs, cols=cols),
elif command == "info":
info(dev)
elif command == "cp":
+3
View File
@@ -14,6 +14,9 @@ const calibre_device_entry_t calibre_mtp_device_table[] = {
// Amazon Kindle Fire HD
, { "Amazon", 0x1949, "Fire HD", 0x0007, DEVICE_FLAGS_ANDROID_BUGS}
// Nexus 10
, { "Google", 0x18d1, "Nexus 10", 0x4ee2, DEVICE_FLAGS_ANDROID_BUGS}
, { NULL, 0xffff, NULL, 0xffff, DEVICE_FLAG_NONE }
};
@@ -696,7 +696,7 @@ PyObject* wpd::put_file(IPortableDevice *device, const wchar_t *parent_id, const
PyBytes_AsStringAndSize(raw, &buf, &bytes_read);
if (bytes_read > 0) {
Py_BEGIN_ALLOW_THREADS;
hr = dest->Write(buf, bytes_read, &bytes_written);
hr = dest->Write(buf, (ULONG)bytes_read, &bytes_written);
Py_END_ALLOW_THREADS;
Py_DECREF(raw);
if (hr == STG_E_MEDIUMFULL) { PyErr_SetString(WPDError, "Cannot write to device as it is full"); break; }
+2 -2
View File
@@ -19,9 +19,9 @@ class TECLAST_K3(USBMS):
PRODUCT_ID = [0x3203]
BCD = [0x0000, 0x0100]
VENDOR_NAME = ['TECLAST', 'IMAGIN', 'RK28XX', 'PER3274B']
VENDOR_NAME = ['TECLAST', 'IMAGIN', 'RK28XX', 'PER3274B', 'BEBOOK']
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['DIGITAL_PLAYER', 'TL-K5',
'EREADER', 'USB-MSC', 'PER3274B']
'EREADER', 'USB-MSC', 'PER3274B', 'BEBOOK']
MAIN_MEMORY_VOLUME_LABEL = 'K3 Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'K3 Storage Card'

Some files were not shown because too many files have changed in this diff Show More