mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merge from trunk
This commit is contained in:
commit
d82f376ccd
@ -16,7 +16,6 @@ resources/ebook-convert-complete.pickle
|
||||
resources/builtin_recipes.xml
|
||||
resources/builtin_recipes.zip
|
||||
resources/template-functions.json
|
||||
resources/display/*.js
|
||||
setup/installer/windows/calibre/build.log
|
||||
src/calibre/translations/.errors
|
||||
src/cssutils/.svn/
|
||||
|
204
Changelog.yaml
204
Changelog.yaml
@ -19,6 +19,210 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.8.52
|
||||
date: 2012-05-18
|
||||
|
||||
new features:
|
||||
- title: "EPUB Input: When setting the cover for a book that identifies its cover image, but not the html wrapper around the cover, try to detect and remove that wrapper automatically."
|
||||
tickets: [ 999959 ]
|
||||
|
||||
- title: "When deleting books of a specific format, show the number of books with each format available"
|
||||
|
||||
- title: "Linux install: No longer create MAN pages as all utilities have more comprehensive command line --help anyway"
|
||||
|
||||
- title: "Add a tweak Preferences->Tweaks to control the default choice of format for the Tweak Book feature"
|
||||
|
||||
- title: "Conversion: Allow setting negative page margins. A negative page margin means that calibre will not specify any page margin in the output document (for formats that support this)"
|
||||
|
||||
bug fixes:
|
||||
- title: "Tweak book: Fix handling of covers when tweaking KF8 books"
|
||||
|
||||
- title: "KF8 Output: Handle input documents with out of sequence ToC entries. Note that currently section jumping in the KF8 output produced by calibre for such files does not work."
|
||||
tickets: [1000493]
|
||||
|
||||
- title: "Edit metadata dialog: Fix the edit values button for custom tag-like columns showing a unneeded warning about changed values"
|
||||
|
||||
- title: "EPUB Output: Be a little more conservative when removing <form> tags. Only remove them if they have actual forms inside. "
|
||||
tickets: [ 1000384 ]
|
||||
|
||||
- title: "EPUB Input: Correctly update the Cover entry in the ToC even when the entry has a fragment reference. "
|
||||
tickets: [ 999973 ]
|
||||
|
||||
- title: "Update ImagMagick DLLs in all calibre binary builds to fix security vulnerabilities in ImageMagick"
|
||||
tickets: [ 999496 ]
|
||||
|
||||
- title: "Advanced search dialog: Fix equals and regex matching not being applied for custom column searches."
|
||||
tickets: [ 980221 ]
|
||||
|
||||
- title: "RTF Input: Handle old RTF files that have commands without braces."
|
||||
tickets: [ 994133 ]
|
||||
|
||||
- title: "Get Books: Diesel, fix results not showing when only a single match is found"
|
||||
|
||||
- title: "Get Books: Fix DRM status indicators for Kobo and Diesel stores. Fix smashwords not returning results."
|
||||
tickets: [ 993755 ]
|
||||
|
||||
- title: "Fix regression in 0.8.51 that broke viewing of LIT and some EPUB files"
|
||||
tickets: [998248, 998216]
|
||||
|
||||
improved recipes:
|
||||
- Clarin
|
||||
- Spiegel
|
||||
- Spiegel International
|
||||
- Montreal Gazette
|
||||
- Gosc Niedzelny
|
||||
- Ars Technica
|
||||
|
||||
new recipes:
|
||||
- title: "Army/Navy/Air force/Marine Times and News busters"
|
||||
author: jde
|
||||
|
||||
- title: "Ads of the World, Heavy Meta (Italian) and Juve La Stampa"
|
||||
author: faber1971
|
||||
|
||||
- title: "Revista Summa"
|
||||
author: Vakya
|
||||
|
||||
- title: "Strategic culture"
|
||||
author: Darko Miletic
|
||||
|
||||
- title: Stars and Stripes
|
||||
author: adoucette
|
||||
|
||||
- title: Nackdenkseiten
|
||||
author: jrda
|
||||
|
||||
|
||||
- version: 0.8.51
|
||||
date: 2012-05-11
|
||||
|
||||
new features:
|
||||
- title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
|
||||
tickets: [994514]
|
||||
|
||||
- title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system"
|
||||
|
||||
- title: "Driver for Motorola XT875 and Pandigital SuperNova"
|
||||
tickets: [996890]
|
||||
|
||||
- title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors"
|
||||
tickets: [994811]
|
||||
|
||||
- title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB"
|
||||
|
||||
- title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices"
|
||||
tickets: [994838]
|
||||
|
||||
bug fixes:
|
||||
- title: "E-book viewer: Improve performance when switching between normal and fullscreen views."
|
||||
tickets: [996102]
|
||||
|
||||
- title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags"
|
||||
|
||||
- title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes."
|
||||
tickets: [997034]
|
||||
|
||||
- title: "Fix download of news in AZW3 format not working"
|
||||
tickets: [996439]
|
||||
|
||||
- title: "Pocketbook driver: Update for new PB 611 firmware."
|
||||
tickets: [903079]
|
||||
|
||||
- title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them"
|
||||
tickets: [994939]
|
||||
|
||||
- title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers."
|
||||
tickets: [994861]
|
||||
|
||||
- title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word"
|
||||
|
||||
improved recipes:
|
||||
- Mainichi news
|
||||
- derStandard
|
||||
- Endgadget Japan
|
||||
|
||||
new recipes:
|
||||
- title: Mainichi English
|
||||
author: Hiroshi Miura
|
||||
|
||||
- title: The Grid TO
|
||||
author: Yusuf W
|
||||
|
||||
- title: National Geographic (Italy)
|
||||
author: faber1971
|
||||
|
||||
- title: Rebelion
|
||||
author: Marc Busque
|
||||
|
||||
- version: 0.8.50
|
||||
date: 2012-05-04
|
||||
|
||||
new features:
|
||||
- title: "Tweak Book: Allow tweaking of KF8 MOBI files. Useful to fine-tune the result of a conversion. Right click on the book and select Tweak Book to use the feature. Note that tweaking a MOBI file that contains both KF8 and older MOBI6 will cause the MOBI6 version to be discarded."
|
||||
|
||||
- title: "AZW3 output plugin. This output plugin generates pure KF8 mobi files. These only work on the Kindle Fire and Kindle Touch with latest firmware."
|
||||
|
||||
- title: "Conversion: Allow easy re-ordering of the search and replace expressions in the conversion dialog. Also apply the expressions in the same order that they were entered when doing the conversion."
|
||||
|
||||
- title: "Automatically add the Tag 'Sample Book' when an Amazon sample is added to calibre"
|
||||
|
||||
- title: "FB2 Input: Better handling of inline images."
|
||||
tickets: [989869]
|
||||
|
||||
bug fixes:
|
||||
- title: "KF8 Output: Fix section to section jumps not working for documents with multi-level ToCs"
|
||||
|
||||
- title: "EPUB Input: Handle the case of the metadata ToC containing a reference to the cover HTML file."
|
||||
tickets: [993812]
|
||||
|
||||
- title: "CHM Input: Handle files with deeply nested markup and non html files listed at the start of the manifest."
|
||||
tickets: [993607]
|
||||
|
||||
- title: "KF8 Output: Workaround Kindle Touch bug that causes the book to be rendered as black pages when a height is specified for <body>"
|
||||
|
||||
- title: "Fix regression in 0.8.49 that broke italics detection in heuristic processing on 32-bit systems."
|
||||
tickets: [991380]
|
||||
|
||||
- title: "KF8 Output: Fix joint MOBI6/KF8 books not being recognized as MOBI files by older Kindles"
|
||||
|
||||
- title: "KF8 Output: Fix errors when processing documents with HTML comments and/or XML processing instructions"
|
||||
|
||||
- title: "Get Books: Amazon fix prices not being found. B&N fix details link. ebooks.com: fix cover image. Website changes to various EU stores"
|
||||
|
||||
- title: "FB2 Input: More robust base64 decoding to handle embedded images that are incorrectly encoded."
|
||||
tickets: [990929]
|
||||
|
||||
- title: "Fix scrolling with the cover browser updating only the selection in the book list, not the current book."
|
||||
tickets: [990881]
|
||||
|
||||
- title: "Save to Disk: Do not run out memory when saving very large files on systems with low RAM."
|
||||
tickets: [990741]
|
||||
|
||||
- title: "FB2 Output: Use 2 letter language codes in preference to 3-letter ones to not break poorly implemented FB2 readers"
|
||||
tickets: [990026]
|
||||
|
||||
- title: "EPUB Input: Auto set the media-type for OPF manifest entries with an empty media-type"
|
||||
|
||||
improved recipes:
|
||||
- National Post
|
||||
- Daily Mirror
|
||||
- Sun
|
||||
- Newsweek Polska
|
||||
- Max-Planck
|
||||
- derStandard
|
||||
- tweakers.net
|
||||
|
||||
new recipes:
|
||||
- title: George Monbiot
|
||||
author: Darko Miletic
|
||||
|
||||
- title: El Mundo
|
||||
author: atordo
|
||||
|
||||
- title: AraInfo and Diagonal
|
||||
author: Ruben Pollan
|
||||
|
||||
|
||||
- version: 0.8.49
|
||||
date: 2012-04-27
|
||||
|
||||
|
26
recipes/ads_of_the_world.recipe
Normal file
26
recipes/ads_of_the_world.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
|
||||
title = u'Ads of the World'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
description = 'The best international advertising campaigns'
|
||||
language = 'en'
|
||||
__author__ = 'faber1971'
|
||||
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'primary'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class':'links inline'})
|
||||
,dict(name='div', attrs={'class':'form-item'})
|
||||
,dict(name='div', attrs={'id':['options', 'comments']})
|
||||
,dict(name='ul', attrs={'id':'nodePager'})
|
||||
]
|
||||
|
||||
reverse_article_order = True
|
||||
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
|
||||
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]
|
43
recipes/air_force_times.recipe
Normal file
43
recipes/air_force_times.recipe
Normal file
@ -0,0 +1,43 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AirForceTimes(BasicNewsRecipe):
|
||||
title = 'Air Force Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Air Force'
|
||||
language = 'en'
|
||||
publisher = 'AirForceTimes.com'
|
||||
category = 'news, U.S. Air Force'
|
||||
tags = 'news, U.S. Air Force'
|
||||
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.airforcetimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.airforcetimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.airforcetimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
42
recipes/army_times.recipe
Normal file
42
recipes/army_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class ArmyTimes(BasicNewsRecipe):
|
||||
title = 'Army Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Army'
|
||||
language = 'en'
|
||||
publisher = 'ArmyTimes.com'
|
||||
category = 'news, U.S. Army'
|
||||
tags = 'news, U.S. Army'
|
||||
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.armytimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.armytimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.armytimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
@ -1,33 +1,34 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
arstechnica.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class ArsTechnica(BasicNewsRecipe):
|
||||
title = u'Ars Technica'
|
||||
language = 'en'
|
||||
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
|
||||
description = 'The art of technology'
|
||||
publisher = 'Ars Technica'
|
||||
description = 'Ars Technica: Serving the technologist for 1.2 decades'
|
||||
publisher = 'Conde Nast Publications'
|
||||
category = 'news, IT, technology'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
extra_css = '''
|
||||
body {font-family: Arial,Helvetica,sans-serif}
|
||||
.title{text-align: left}
|
||||
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
|
||||
.news-item-figure-caption-text{font-size:small; font-style:italic}
|
||||
.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold}
|
||||
'''
|
||||
ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
extra_css = '''
|
||||
body {font-family: Arial,sans-serif}
|
||||
.heading{font-family: "Times New Roman",serif}
|
||||
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
|
||||
img{display: block}
|
||||
.caption-text{font-size:small; font-style:italic}
|
||||
.caption-byline{font-size:small; font-style:italic; font-weight:bold}
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
@ -36,93 +37,64 @@ class ArsTechnica(BasicNewsRecipe):
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
|
||||
#preprocess_regexps = [
|
||||
# (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"')
|
||||
# ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
|
||||
# ]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class':'standalone'})
|
||||
,dict(attrs={'id':'article-guts'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='div', attrs={'class':'read-more-link'})
|
||||
dict(name=['object','link','embed','iframe','meta'])
|
||||
,dict(attrs={'class':'corner-info'})
|
||||
]
|
||||
#remove_attributes=['width','height']
|
||||
remove_attributes = ['lang']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
|
||||
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
|
||||
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
|
||||
,(u'Chipster (Hardware content)' , u'http://feeds.arstechnica.com/arstechnica/hardware/' )
|
||||
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
|
||||
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
|
||||
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
|
||||
,(u'Nobel Intent (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
|
||||
,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
|
||||
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
|
||||
]
|
||||
|
||||
# This deals with multi-page stories
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'class':'pager'})
|
||||
pager = soup.find(attrs={'class':'numbers'})
|
||||
if pager:
|
||||
for atag in pager.findAll('a',href=True):
|
||||
str = self.tag_to_string(atag)
|
||||
if str.startswith('Next'):
|
||||
nurl = 'http://arstechnica.com' + atag['href']
|
||||
rawc = self.index_to_soup(nurl,True)
|
||||
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
||||
|
||||
readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
|
||||
if readmoretag:
|
||||
readmoretag.extract()
|
||||
texttag = soup2.find('div', attrs={'class':'body'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
pager.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
nexttag = pager.find(attrs={'class':'next'})
|
||||
if nexttag:
|
||||
nurl = nexttag.parent['href']
|
||||
rawc = self.index_to_soup(nurl,True)
|
||||
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
||||
texttag = soup2.find(attrs={'id':'article-guts'})
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
pager.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# Adds line breaks near the byline (not sure why this is needed)
|
||||
ftag = soup.find('div', attrs={'class':'byline'})
|
||||
if ftag:
|
||||
brtag = Tag(soup,'br')
|
||||
brtag2 = Tag(soup,'br')
|
||||
ftag.insert(4,brtag)
|
||||
ftag.insert(5,brtag2)
|
||||
|
||||
# Remove style items
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
|
||||
# Remove id
|
||||
for item in soup.findAll(id=True):
|
||||
del item['id']
|
||||
|
||||
# For some reason, links to authors don't have the domainname
|
||||
a_author = soup.find('a',{'href':re.compile("^/author")})
|
||||
if a_author:
|
||||
a_author['href'] = 'http://arstechnica.com'+a_author['href']
|
||||
|
||||
# within div class news-item-figure, we need to grab images
|
||||
|
||||
# Deal with multi-page stories
|
||||
self.append_page(soup, soup.body, 3)
|
||||
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
# If the article title starts with Etc:, don't return it
|
||||
if self.ignoreEtcArticles:
|
||||
article_title = article.get('title',None)
|
||||
if re.match('Etc: ',article_title) is not None:
|
||||
return None
|
||||
|
||||
# The actual article is in a guid tag
|
||||
return article.get('guid', None).rpartition('?')[0]
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
return '<html><head>'+raw[raw.find('</head>'):]
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
clarin.com
|
||||
'''
|
||||
@ -8,9 +8,9 @@ clarin.com
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Clarin(BasicNewsRecipe):
|
||||
title = 'Clarin'
|
||||
title = 'Clarín'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y mundo'
|
||||
description = 'Clarin.com. Noticias de la Argentina y el mundo. Información actualizada las 24 horas y en español. Informate ya'
|
||||
publisher = 'Grupo Clarin'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
@ -26,9 +26,7 @@ class Clarin(BasicNewsRecipe):
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
h2{font-family: Georgia,serif; font-size: xx-large}
|
||||
.hora{font-weight:bold}
|
||||
.hd p{font-size: small}
|
||||
.nombre-autor{color: #0F325A}
|
||||
.info,.nombre-autor,.hora{font-size: small}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
@ -38,38 +36,35 @@ class Clarin(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['hd','mt']})]
|
||||
remove_tags = [dict(name=['meta','base','link'])]
|
||||
remove_attributes = ['lang','_mce_bogus']
|
||||
keep_only_tags = [dict(attrs={'class':['hd','mt','bd']})]
|
||||
remove_tags = [dict(name=['meta','base','link','iframe','embed','object'])]
|
||||
remove_attributes = ['lang']
|
||||
|
||||
feeds = [
|
||||
(u'Pagina principal', u'http://www.clarin.com/rss/' )
|
||||
,(u'Politica' , u'http://www.clarin.com/rss/politica/' )
|
||||
,(u'Deportes' , u'http://www.clarin.com/rss/deportes/' )
|
||||
,(u'Economia' , u'http://www.clarin.com/economia/' )
|
||||
,(u'Mundo' , u'http://www.clarin.com/rss/mundo/' )
|
||||
,(u'iEco' , u'http://www.ieco.clarin.com/rss/' )
|
||||
,(u'Espectaculos' , u'http://www.clarin.com/rss/espectaculos/')
|
||||
,(u'Sociedad' , u'http://www.clarin.com/rss/sociedad/' )
|
||||
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
|
||||
,(u'Policiales' , u'http://www.clarin.com/rss/policiales/' )
|
||||
,(u'Internet' , u'http://www.clarin.com/rss/internet/' )
|
||||
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?print=1'
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'bb-md bb-md-edicion_papel'})
|
||||
if cover_item:
|
||||
ap = cover_item.find('a',attrs={'href':'/edicion-impresa/'})
|
||||
if ap:
|
||||
cover_url = self.INDEX + ap.img['src']
|
||||
for item in soup.findAll('a', href=True):
|
||||
if item['href'].startswith('/tapas/TAPA_CLA'):
|
||||
cover_url = self.INDEX + item['href']
|
||||
return cover_url
|
||||
return cover_url
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from claibre import browser
|
||||
from calibre import browser
|
||||
import re
|
||||
|
||||
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
|
@ -7,10 +7,11 @@ __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
''' http://www.derstandard.at - Austrian Newspaper '''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from time import strftime
|
||||
|
||||
class DerStandardRecipe(BasicNewsRecipe):
|
||||
title = u'derStandard'
|
||||
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira'
|
||||
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
|
||||
description = u'Nachrichten aus Österreich'
|
||||
publisher ='derStandard.at'
|
||||
category = 'news, politics, nachrichten, Austria'
|
||||
@ -88,3 +89,41 @@ class DerStandardRecipe(BasicNewsRecipe):
|
||||
for t in soup.findAll(['ul', 'li']):
|
||||
t.name = 'div'
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
highResolution = True
|
||||
|
||||
date = strftime("%Y/%Y%m%d")
|
||||
# it is also possible for the past
|
||||
#date = '2012/20120503'
|
||||
|
||||
urlP1 = 'http://epaper.derstandarddigital.at/'
|
||||
urlP2 = 'data_ep/STAN/' + date
|
||||
urlP3 = '/V.B1/'
|
||||
urlP4 = 'paper.htm'
|
||||
urlHTML = urlP1 + urlP2 + urlP3 + urlP4
|
||||
|
||||
br = self.clone_browser(self.browser)
|
||||
htmlF = br.open_novisit(urlHTML)
|
||||
htmlC = htmlF.read()
|
||||
|
||||
|
||||
# URL EXAMPLE: data_ep/STAN/2012/20120504/V.B1/pages/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE.htm
|
||||
# consists of part2 + part3 + 'pages/' + code
|
||||
# 'pages/' has length 6, code has lenght 36
|
||||
|
||||
index = htmlC.find(urlP2) + len(urlP2 + urlP3) + 6
|
||||
code = htmlC[index:index + 36]
|
||||
|
||||
|
||||
# URL EXAMPLE HIGH RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE_b.png
|
||||
# URL EXAMPLE LOW RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/2AB52F71-11C1-4859-9114-CDCD79BEFDCB.png
|
||||
|
||||
urlPic = urlP1 + urlP2 + '/pagejpg/' + code
|
||||
|
||||
if highResolution:
|
||||
urlPic = urlPic + '_b'
|
||||
|
||||
urlPic = urlPic + '.png'
|
||||
|
||||
return urlPic
|
||||
|
15
recipes/drytooling_pl.recipe
Normal file
15
recipes/drytooling_pl.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1337668045(BasicNewsRecipe):
|
||||
title = u'Drytooling.com.pl'
|
||||
masthead_url = 'http://drytooling.com.pl/images/drytooling-kindle.png'
|
||||
cover_url = 'http://drytooling.com.pl/images/drytooling-kindle.png'
|
||||
description = u'Drytooling.com.pl jest serwisem wspinaczki zimowej, alpinizmu i himalaizmu. Jeśli uwielbiasz zimę, nie możesz doczekać się aż wyciągniesz szpej z szafki i uderzysz w Tatry, Alpy, czy może Himalaje, to znajdziesz tutaj naprawdę dużo interesujących Cię treści! Zapraszamy!'
|
||||
__author__ = u'Damian Granowski'
|
||||
oldest_article = 100
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Newsy', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=4&format=raw'), (u'Artyku\u0142y', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=3&format=raw'), (u'Imprezy i zawody', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=5&format=raw'), (u'Baza G\xf3rska', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=6&format=raw'), (u'Wyprawy', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=7&format=raw'), (u'Newsy / alpinizm', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=12&format=raw'), (u'Newsy / klasyka zimowa', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=11&format=raw'), (u'Newsy / himalaizm', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=10&format=raw'), (u'Outdoor', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=8&format=raw')]
|
30
recipes/economico.recipe
Normal file
30
recipes/economico.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Economico(BasicNewsRecipe):
|
||||
title = u'Economico'
|
||||
language = 'pt'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
feeds = [
|
||||
('Ultima Hora',
|
||||
'http://economico.sapo.pt/rss/ultimas'),
|
||||
('Em Foco',
|
||||
'http://economico.sapo.pt/rss/emfoco'),
|
||||
('Mercados',
|
||||
'http://economico.sapo.pt/rss/mercados'),
|
||||
('Empresas',
|
||||
'http://economico.sapo.pt/rss/empresas'),
|
||||
('Economia',
|
||||
'http://economico.sapo.pt/rss/economia'),
|
||||
('Politica',
|
||||
'http://economico.sapo.pt/rss/politica'),
|
||||
]
|
||||
|
@ -17,7 +17,25 @@ class EndgadgetJapan(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
language = 'ja'
|
||||
encoding = 'utf-8'
|
||||
feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
|
||||
index = 'http://japanese.engadget.com/'
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'post_body'})
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
newsarticles = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
for topstories in soup.findAll('div',attrs={'class':'post_content'}):
|
||||
itt = topstories.find('h4')
|
||||
itema = itt.find('a',href=True)
|
||||
newsarticles.append({
|
||||
'title' :itema.string
|
||||
,'date' :''
|
||||
,'url' :itema['href']
|
||||
,'description':''
|
||||
})
|
||||
feeds.append(('Latest Posts', newsarticles))
|
||||
return feeds
|
||||
|
||||
remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
|
||||
remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})
|
||||
|
82
recipes/folha.recipe
Normal file
82
recipes/folha.recipe
Normal file
@ -0,0 +1,82 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.folha.uol.com.br
|
||||
'''
|
||||
import urllib
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Folha_de_s_paulo(BasicNewsRecipe):
|
||||
title = u'Folha de São Paulo - portal'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Um Jornala a servicao do Brasil'
|
||||
publisher = 'Folhapress'
|
||||
category = 'news, politics, Brasil'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'pt_BR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name=['meta','link','base','iframe','embed','object'])]
|
||||
keep_only_tags = [dict(attrs={'id':'articleNew'})]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Poder' , u'http://feeds.folha.uol.com.br/poder/rss091.xml' )
|
||||
,(u'Mundo' , u'http://feeds.folha.uol.com.br/mundo/rss091.xml' )
|
||||
,(u'Mercado' , u'http://feeds.folha.uol.com.br/mercado/rss091.xml' )
|
||||
,(u'Cotidiano' , u'http://feeds.folha.uol.com.br/cotidiano/rss091.xml' )
|
||||
,(u'Esporte' , u'http://feeds.folha.uol.com.br/esporte/rss091.xml' )
|
||||
,(u'Ilustrada' , u'http://feeds.folha.uol.com.br/ilustrada/rss091.xml' )
|
||||
,(u'F5' , u'http://feeds.folha.uol.com.br/f5/rss091.xml' )
|
||||
,(u'Ciência' , u'http://feeds.folha.uol.com.br/ciencia/rss091.xml' )
|
||||
,(u'Tec' , u'http://feeds.folha.uol.com.br/tec/rss091.xml' )
|
||||
,(u'Ambiente' , u'http://feeds.folha.uol.com.br/ambiente/rss091.xml' )
|
||||
,(u'Bichos' , u'http://feeds.folha.uol.com.br/bichos/rss091.xml' )
|
||||
,(u'Celebridades' , u'http://feeds.folha.uol.com.br/celebridades/rss091.xml' )
|
||||
,(u'Comida' , u'http://feeds.folha.uol.com.br/comida/rss091.xml' )
|
||||
,(u'Equilibrio' , u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml' )
|
||||
,(u'Folhateen' , u'http://feeds.folha.uol.com.br/folhateen/rss091.xml' )
|
||||
,(u'Folhinha' , u'http://feeds.folha.uol.com.br/folhinha/rss091.xml' )
|
||||
,(u'Ilustrissima' , u'http://feeds.folha.uol.com.br/ilustrissima/rss091.xml' )
|
||||
,(u'Saber' , u'http://feeds.folha.uol.com.br/saber/rss091.xml' )
|
||||
,(u'Turismo' , u'http://feeds.folha.uol.com.br/turismo/rss091.xml' )
|
||||
,(u'Panel do Leitor', u'http://feeds.folha.uol.com.br/folha/paineldoleitor/rss091.xml')
|
||||
,(u'Publifolha' , u'http://feeds.folha.uol.com.br/folha/publifolha/rss091.xml' )
|
||||
,(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml' )
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
curl = url.partition('/*')[2]
|
||||
return curl
|
||||
|
||||
def print_version(self, url):
|
||||
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.folha.uol.com.br/')
|
||||
cont = soup.find('div', attrs={'id':'newspaper'})
|
||||
if cont:
|
||||
ai = cont.find('a', href='http://www1.folha.uol.com.br/fsp/')
|
||||
if ai:
|
||||
return ai.img['src']
|
||||
return None
|
@ -8,7 +8,7 @@ from urllib2 import Request, urlopen, URLError
|
||||
class FolhaOnline(BasicNewsRecipe):
|
||||
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
|
||||
LANGUAGE = 'pt_br'
|
||||
language = 'pt'
|
||||
language = 'pt_BR'
|
||||
LANGHTM = 'pt-br'
|
||||
ENCODING = 'cp1252'
|
||||
ENCHTM = 'iso-8859-1'
|
||||
|
@ -14,7 +14,7 @@ class FSP(BasicNewsRecipe):
|
||||
HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
|
||||
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
|
||||
|
||||
language = 'pt'
|
||||
language = 'pt_BR'
|
||||
no_stylesheets = True
|
||||
max_articles_per_feed = 40
|
||||
remove_javascript = True
|
||||
|
@ -6,21 +6,20 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from datetime import date
|
||||
import re
|
||||
|
||||
class GN(BasicNewsRecipe):
|
||||
EDITION = 0
|
||||
|
||||
__author__ = 'Piotr Kontek'
|
||||
title = u'Gość niedzielny'
|
||||
description = 'Weekly magazine'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'pl'
|
||||
remove_javascript = True
|
||||
temp_files = []
|
||||
simultaneous_downloads = 1
|
||||
masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
|
||||
title = u'Gość niedzielny'
|
||||
|
||||
articles_are_obfuscated = True
|
||||
|
||||
@ -56,22 +55,28 @@ class GN(BasicNewsRecipe):
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
def find_last_issue(self):
|
||||
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny')
|
||||
#szukam zdjęcia i linka do porzedniego pełnego numeru
|
||||
def find_last_issue(self, year):
|
||||
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
|
||||
|
||||
#szukam zdjęcia i linka do poprzedniego pełnego numeru
|
||||
first = True
|
||||
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
|
||||
img = d.find('img')
|
||||
if img != None:
|
||||
a = img.parent
|
||||
self.EDITION = a['href']
|
||||
self.title = img['alt']
|
||||
self.cover_url = 'http://www.gosc.pl' + img['src']
|
||||
if not first:
|
||||
if year != date.today().year or not first:
|
||||
break
|
||||
first = False
|
||||
|
||||
def parse_index(self):
|
||||
self.find_last_issue()
|
||||
year = date.today().year
|
||||
self.find_last_issue(year)
|
||||
##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
|
||||
if self.EDITION == 0:
|
||||
self.find_last_issue(year-1)
|
||||
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
|
||||
feeds = []
|
||||
#wstepniak
|
||||
|
79
recipes/grid_to.recipe
Normal file
79
recipes/grid_to.recipe
Normal file
@ -0,0 +1,79 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheGridTO(BasicNewsRecipe):
|
||||
#: The title to use for the ebook
|
||||
title = u'The Grid TO'
|
||||
|
||||
#: A couple of lines that describe the content this recipe downloads.
|
||||
#: This will be used primarily in a GUI that presents a list of recipes.
|
||||
description = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
|
||||
'accessible voice for Toronto.')
|
||||
|
||||
#: The author of this recipe
|
||||
__author__ = u'Yusuf W'
|
||||
|
||||
#: The language that the news is in. Must be an ISO-639 code either
|
||||
#: two or three characters long
|
||||
language = 'en_CA'
|
||||
|
||||
#: Publication type
|
||||
#: Set to newspaper, magazine or blog
|
||||
publication_type = 'newspaper'
|
||||
|
||||
#: Convenient flag to disable loading of stylesheets for websites
|
||||
#: that have overly complex stylesheets unsuitable for conversion
|
||||
#: to ebooks formats
|
||||
#: If True stylesheets are not downloaded and processed
|
||||
no_stylesheets = True
|
||||
|
||||
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
|
||||
remove_tags_before = dict(name='div', id='content')
|
||||
remove_tags_after = dict(name='div', id='content')
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'right-content pull-right'}),
|
||||
dict(name='div', attrs={'class':'right-content'}),
|
||||
dict(name='div', attrs={'class':'ftr-line'}),
|
||||
dict(name='div', attrs={'class':'pull-right'}),
|
||||
dict(name='div', id='comments'),
|
||||
dict(name='div', id='tags')
|
||||
]
|
||||
|
||||
#: Keep only the specified tags and their children.
|
||||
#keep_only_tags = [dict(name='div', id='content')]
|
||||
|
||||
cover_margins = (0, 0, '#ffffff')
|
||||
|
||||
INDEX = 'http://www.thegridto.com'
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src']
|
||||
|
||||
return cover_url
|
||||
|
||||
def parse_index(self):
|
||||
|
||||
# Get the latest issue
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
|
||||
|
||||
# Parse the index of the latest issue
|
||||
self.INDEX = self.INDEX + a['href']
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
feeds = []
|
||||
for section in ['city', 'life', 'culture']:
|
||||
section_class = 'left-content article-listing ' + section + ' pull-left'
|
||||
div = soup.find(attrs={'class': section_class})
|
||||
|
||||
articles = []
|
||||
for tag in div.findAllNext(attrs={'class':'search-block'}):
|
||||
a = tag.findAll('a', href=True)[1]
|
||||
|
||||
title = self.tag_to_string(a)
|
||||
url = a['href']
|
||||
|
||||
articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
|
||||
feeds.append((section, articles))
|
||||
return feeds
|
22
recipes/heavy_metal_it.recipe
Normal file
22
recipes/heavy_metal_it.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1336289226(BasicNewsRecipe):
|
||||
title = u'Heavy Metal'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = False
|
||||
masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
|
||||
feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'entry'})
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'sociable'})
|
||||
]
|
||||
description = 'An Heavy metal Italian magazine'
|
||||
__author__ = 'faber1971'
|
||||
language = 'it'
|
||||
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '6, May 2012'
|
BIN
recipes/icons/drytooling_pl.png
Normal file
BIN
recipes/icons/drytooling_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1007 B |
BIN
recipes/icons/folha.png
Normal file
BIN
recipes/icons/folha.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.6 KiB |
BIN
recipes/icons/strategic_culture.png
Normal file
BIN
recipes/icons/strategic_culture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 648 B |
@ -20,6 +20,8 @@ class JijiDotCom(BasicNewsRecipe):
|
||||
top_url = 'http://www.jiji.com/'
|
||||
|
||||
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
|
||||
|
||||
remove_tags_before = dict(id="article-area")
|
||||
remove_tags_after = dict(id="ad_google")
|
||||
|
||||
def get_cover_url(self):
|
||||
|
24
recipes/juve_la_stampa.recipe
Normal file
24
recipes/juve_la_stampa.recipe
Normal file
@ -0,0 +1,24 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1336504510(BasicNewsRecipe):
|
||||
title = u'Juve - La Stampa'
|
||||
oldest_article = 1
|
||||
language = 'it'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
masthead_url = 'http://www3.lastampa.it/fileadmin/media/sport/quijuve/top_quijuve.jpg'
|
||||
feeds = [(u'Qui Juve - La Stampa', u'http://feed43.com/2352784107537677.xml')]
|
||||
remove_tags = [dict(name='div',attrs={'class':['article-toolbar', 'sezione sezione-news', 'intestazione']})]
|
||||
|
||||
extra_css = '''
|
||||
div.dettaglio div.immagine_girata p.news-single-imgcaption {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
.sezione {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
body {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
h3 {color: #000000; font-family: "Georgia", "Times", serif; font-size: 22px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
div.dettaglio h2.catenaccio {color: #000000; font-family: "Georgia", "Times", serif; font-size: 18px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
|
||||
'''
|
||||
description = 'News about Juventus from La Stampa'
|
||||
__author__ = 'faber1971'
|
||||
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '8, May 2012'
|
@ -1,7 +1,7 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version'
|
||||
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini; minor fixes by faber1971'
|
||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>, faber1971'
|
||||
description = 'Italian daily newspaper - v1.02 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version; 11.05.2012 new version'
|
||||
|
||||
'''
|
||||
http://www.repubblica.it/
|
||||
@ -12,14 +12,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LaRepubblica(BasicNewsRecipe):
|
||||
title = 'La Repubblica'
|
||||
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic'
|
||||
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971'
|
||||
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
|
||||
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
|
||||
publisher = 'Gruppo editoriale L\'Espresso'
|
||||
category = 'News, politics, culture, economy, general interest'
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 5
|
||||
oldest_article = 1
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
@ -59,6 +59,7 @@ class LaRepubblica(BasicNewsRecipe):
|
||||
dict(attrs={'class':'articolo'}),
|
||||
dict(attrs={'class':'body-text'}),
|
||||
dict(name='p', attrs={'class':'disclaimer clearfix'}),
|
||||
dict(name='div', attrs={'id':'main'}),
|
||||
dict(attrs={'id':'contA'})
|
||||
]
|
||||
|
||||
@ -67,7 +68,7 @@ class LaRepubblica(BasicNewsRecipe):
|
||||
dict(name=['object','link','meta','iframe','embed']),
|
||||
dict(name='span',attrs={'class':'linkindice'}),
|
||||
dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
|
||||
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}),
|
||||
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head', 'sidebar']}),
|
||||
dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
|
||||
dict(name='div', attrs={'class':'generalbox'}),
|
||||
dict(name='ul', attrs={'id':'hystory'})
|
||||
@ -88,11 +89,12 @@ class LaRepubblica(BasicNewsRecipe):
|
||||
(u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
|
||||
(u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
|
||||
(u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
|
||||
(u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
|
||||
(u'Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Bari', u'http://bari.repubblica.it/rss/rss2.0.xml'),
|
||||
(u'Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -16,12 +16,12 @@ class MainichiDailyNews(BasicNewsRecipe):
|
||||
publisher = 'Mainichi Daily News'
|
||||
category = 'news, japan'
|
||||
language = 'ja'
|
||||
|
||||
feeds = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
|
||||
index = 'http://mainichi.jp/select/'
|
||||
remove_javascript = True
|
||||
masthead_title = u'MAINICHI DAILY NEWS'
|
||||
|
||||
remove_tags_before = {'class':"NewsTitle"}
|
||||
remove_tags = [{'class':"RelatedArticle"}]
|
||||
remove_tags_after = {'class':"Credit"}
|
||||
remove_tags_after = {'class':"NewsBody clr"}
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
@ -32,9 +32,30 @@ class MainichiDailyNews(BasicNewsRecipe):
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'pheedo.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if re.search(r'rssad.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
|
||||
return feeds
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
topstories = soup.find('ul',attrs={'class':'MaiLink'})
|
||||
if topstories:
|
||||
newsarticles = []
|
||||
for itt in topstories.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
newsarticles.append({
|
||||
'title' :itema.string
|
||||
,'date' :''
|
||||
,'url' :itema['href']
|
||||
,'description':''
|
||||
})
|
||||
feeds.append(('latest', newsarticles))
|
||||
return feeds
|
||||
|
||||
|
67
recipes/mainichi_en.recipe
Normal file
67
recipes/mainichi_en.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.mainichi.jp
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MainichiEnglishNews(BasicNewsRecipe):
|
||||
title = u'The Mainichi'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 40
|
||||
description = 'Japanese traditional newspaper Mainichi news in English'
|
||||
publisher = 'Mainichi News'
|
||||
category = 'news, japan'
|
||||
language = 'en_JP'
|
||||
index = 'http://mainichi.jp/english/english/index.html'
|
||||
remove_javascript = True
|
||||
masthead_url = 'http://mainichi.jp/english/images/themainichi.png'
|
||||
|
||||
remove_tags_before = {'class':"NewsTitle"}
|
||||
remove_tags_after = {'class':"NewsBody clr"}
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'pheedo.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if re.search(r'rssad.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
|
||||
return feeds
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
for section in soup.findAll('section'):
|
||||
newsarticles = []
|
||||
section_name = 'news'
|
||||
hds = section.find('div', attrs={'class':'CategoryHead clr'})
|
||||
if hds:
|
||||
section_item = hds.find('h1')
|
||||
if section_item:
|
||||
section_name = section_item.find('a').string
|
||||
items = section.find('ul', attrs={'class':'MaiLink'})
|
||||
for item in items.findAll('li'):
|
||||
if item:
|
||||
itema = item.find('a')
|
||||
newsarticles.append({
|
||||
'title' :itema.string
|
||||
,'date' :''
|
||||
,'url' :itema['href']
|
||||
,'description':''
|
||||
})
|
||||
feeds.append((section_name, newsarticles))
|
||||
return feeds
|
||||
|
@ -1,34 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class MainichiDailyITNews(BasicNewsRecipe):
|
||||
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
|
||||
publisher = 'Mainichi Daily News'
|
||||
category = 'news, Japan, IT, Electronics'
|
||||
language = 'ja'
|
||||
|
||||
feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
|
||||
|
||||
remove_tags_before = {'class':"NewsTitle"}
|
||||
remove_tags = [{'class':"RelatedArticle"}]
|
||||
remove_tags_after = {'class':"Credit"}
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'pheedo.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
|
||||
return feeds
|
59
recipes/mainichi_science_news.recipe
Normal file
59
recipes/mainichi_science_news.recipe
Normal file
@ -0,0 +1,59 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.mainichi.jp
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MainichiDailyScienceNews(BasicNewsRecipe):
|
||||
title = u'\u6bce\u65e5\u65b0\u805e(Science)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
description = 'Japanese traditional newspaper Mainichi Daily News - science'
|
||||
publisher = 'Mainichi Daily News'
|
||||
category = 'news, japan'
|
||||
language = 'ja'
|
||||
index = 'http://mainichi.jp/select/science'
|
||||
remove_javascript = True
|
||||
masthead_title = u'MAINICHI DAILY NEWS'
|
||||
|
||||
remove_tags_before = {'class':"NewsTitle"}
|
||||
remove_tags_after = {'class':"NewsBody clr"}
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'rssad.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
|
||||
return feeds
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
topstories = soup.find('ul',attrs={'class':'MaiLink'})
|
||||
if topstories:
|
||||
newsarticles = []
|
||||
for itt in topstories.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
newsarticles.append({
|
||||
'title' :itema.string
|
||||
,'date' :''
|
||||
,'url' :itema['href']
|
||||
,'description':''
|
||||
})
|
||||
feeds.append(('Science', newsarticles))
|
||||
return feeds
|
||||
|
42
recipes/marine_corps_times.recipe
Normal file
42
recipes/marine_corps_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class MarineCorpsTimes(BasicNewsRecipe):
|
||||
title = 'Marine Corps Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Marine Corps'
|
||||
language = 'en'
|
||||
publisher = 'MarineCorpsTimes.com'
|
||||
category = 'news, U.S. Marine Corps'
|
||||
tags = 'news, U.S. Marine Corps'
|
||||
cover_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
|
||||
masthead_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.MarineCorpstimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.MarineCorpstimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.MarineCorpstimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.MarineCorpstimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.MarineCorpstimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.MarineCorpstimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.MarineCorpstimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.MarineCorpstimes.com/rss_guard.php'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
@ -56,7 +56,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href']
|
||||
use_embedded_content = False
|
||||
extra_css = 'body{font-size:1em;padding:5px 0}body,a,h2{background-color:#fff;text-decoration:none;color:#000}#date,div.byline,p.article-image-caption .credits,.calibrenavbar{font-size:.5em}.article-box-fact.module-title,#date,div.byline{clear:both}.article-box-fact.module-title{margin:8px 0}.article-box-fact.module-title,h2{font-size:1.1em}h1.title{font-size:1.4em}h1.title,.article-body p,div.article-image-caption-2column,div.article-image-caption-3column,#date,div.byline{margin-bottom:.6em}div.article-box-fact div.subtitle,.article-box-fact.module-title,h1.title,p.article-image-caption{font-weight:700}div.column-1-3{margin-left:19px}div.column-1-2{display:inline}div.column-1-2,div.column-1-3{margin-right:7px}p.article-image-caption{font-size:.6em;margin-top:5px}p.article-image-caption,#date,div.byline{color:#616262}p.article-image-caption .credits{font-style:italic}div.article-image-caption{width:246px}div.article-image-caption-2column{width:373px}div.column-3{background-color:#eee;float:right;width:50%}div.column-3 module-title{border:1px solid #aaa}div.article-box-fact div.subtitle,.article-box-fact.module-title{color:#24763b}div.byline{border-top:2px solid #24763b}div.column-3,img,div.column-3,p.small,div.article-image-caption{margin:.5em}img,p.small,.column1,h2{border:0;padding:0}.column1,h1,h2{margin:0}'
|
||||
extra_css = 'body{font-size:1em;padding:5px 0}body,a,h2{background-color:#fff;text-decoration:none;color:#000}#date,div.byline,p.article-image-caption .credits,.calibrenavbar,.calibre5{font-size:.5em}.article-box-fact.module-title,#date,div.byline{clear:both}.article-box-fact{font-size:0.7em}.article-box-fact.module-title{margin:8px 0; font-size:0.8em}h2{font-size:1em}h1.title{font-size:1.4em}h1.title,.article-body p,div.article-image-caption-2column,div.article-image-caption-3column,#date,div.byline{margin-bottom:.6em}div.article-box-fact div.subtitle,.article-box-fact.module-title,h1.title,p.article-image-caption{font-weight:700}div.column-1-3{margin-left:19px}div.column-1-2{display:inline}div.column-1-2,div.column-1-3{margin-right:7px}p.article-image-caption{font-size:.6em;margin-top:5px}p.article-image-caption,#date,div.byline{color:#616262}p.article-image-caption .credits{font-style:italic}div.article-image-caption{width:246px}div.article-image-caption-2column{width:373px}div.column-3{background-color:#eee;float:right;width:50%}div.column-3 module-title{border:1px solid #aaa}div.article-box-fact div.subtitle,.article-box-fact.module-title{color:#24763b}div.byline{border-top:2px solid #24763b}div.column-3,img,div.column-3,p.small,div.article-image-caption{margin:.5em}img,p.small,.column1,h2,.calibre5,.calibrenavbar{border:0;padding:0}.column1,h1,h2,.calibrenavbar{margin:0}'
|
||||
|
||||
|
||||
preprocess_regexps = [
|
||||
@ -71,11 +71,11 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['iframe','script','noscript','style']),
|
||||
dict(name='div', attrs={'class':[re.compile('column-[14]-5'),'col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)')]}),
|
||||
dict(id=['column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'sidebar',re.compile('^article-\d'),'comments','gallery-1']),
|
||||
dict(name='div', attrs={'class':['column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}),
|
||||
dict(id=['column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1']),
|
||||
dict(name='a', attrs={'name':'comments'}),
|
||||
#dict(name='div', attrs={'data-href'}),
|
||||
dict(name='img', attrs={'class':'top-line'}),
|
||||
dict(name='img', attrs={'class':'top-line','title':'volledig scherm'}),
|
||||
dict(attrs={'style':re.compile('^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$'),'title':'volledig scherm'})]
|
||||
|
||||
'''removed by before/after:
|
||||
|
41
recipes/military_times.recipe
Normal file
41
recipes/military_times.recipe
Normal file
@ -0,0 +1,41 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MilitaryTimes(BasicNewsRecipe):
|
||||
title = 'Military Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Military'
|
||||
language = 'en'
|
||||
publisher = 'MilitaryTimes.com'
|
||||
category = 'news, U.S. Military'
|
||||
tags = 'news, U.S. Military'
|
||||
cover_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
|
||||
masthead_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.militarytimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.militarytimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.militarytimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.militarytimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.militarytimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.militarytimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.militarytimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.militarytimes.com/rss_guard.php'),
|
||||
|
||||
]
|
||||
|
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
@ -7,77 +6,21 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following four lines for the Victoria Times Colonist
|
||||
## title = u'Victoria Times Colonist'
|
||||
## url_prefix = 'http://www.timescolonist.com'
|
||||
## description = u'News from Victoria, BC'
|
||||
## fp_tag = 'CAN_TC'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Province
|
||||
## title = u'Vancouver Province'
|
||||
## url_prefix = 'http://www.theprovince.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VP'
|
||||
|
||||
# un-comment the following four lines for the Vancouver Sun
|
||||
## title = u'Vancouver Sun'
|
||||
## url_prefix = 'http://www.vancouversun.com'
|
||||
## description = u'News from Vancouver, BC'
|
||||
## fp_tag = 'CAN_VS'
|
||||
|
||||
# un-comment the following four lines for the Edmonton Journal
|
||||
## title = u'Edmonton Journal'
|
||||
## url_prefix = 'http://www.edmontonjournal.com'
|
||||
## description = u'News from Edmonton, AB'
|
||||
## fp_tag = 'CAN_EJ'
|
||||
|
||||
# un-comment the following four lines for the Calgary Herald
|
||||
## title = u'Calgary Herald'
|
||||
## url_prefix = 'http://www.calgaryherald.com'
|
||||
## description = u'News from Calgary, AB'
|
||||
## fp_tag = 'CAN_CH'
|
||||
|
||||
# un-comment the following four lines for the Regina Leader-Post
|
||||
## title = u'Regina Leader-Post'
|
||||
## url_prefix = 'http://www.leaderpost.com'
|
||||
## description = u'News from Regina, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Saskatoon Star-Phoenix
|
||||
## title = u'Saskatoon Star-Phoenix'
|
||||
## url_prefix = 'http://www.thestarphoenix.com'
|
||||
## description = u'News from Saskatoon, SK'
|
||||
## fp_tag = ''
|
||||
|
||||
# un-comment the following four lines for the Windsor Star
|
||||
## title = u'Windsor Star'
|
||||
## url_prefix = 'http://www.windsorstar.com'
|
||||
## description = u'News from Windsor, ON'
|
||||
## fp_tag = 'CAN_'
|
||||
|
||||
# un-comment the following four lines for the Ottawa Citizen
|
||||
## title = u'Ottawa Citizen'
|
||||
## url_prefix = 'http://www.ottawacitizen.com'
|
||||
## description = u'News from Ottawa, ON'
|
||||
## fp_tag = 'CAN_OC'
|
||||
|
||||
# un-comment the following four lines for the Montreal Gazette
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
title = u'Montreal Gazette'
|
||||
url_prefix = 'http://www.montrealgazette.com'
|
||||
description = u'News from Montreal, QC'
|
||||
fp_tag = 'CAN_MG'
|
||||
|
||||
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//*[@id="imageBox"]'
|
||||
timefmt = ' [%b %d]'
|
||||
extra_css = '''
|
||||
.timestamp { font-size:xx-small; display: block; }
|
||||
@ -87,135 +30,19 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
.byline { font-size:xx-small; }
|
||||
#photocaption { font-size: small; font-style: italic }
|
||||
#photocredit { font-size: xx-small; }'''
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
|
||||
remove_tags = [{'class':'comments'},
|
||||
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
|
||||
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
|
||||
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
|
||||
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
|
||||
dict(name='div', attrs={'class':'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
|
||||
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
if self.fp_tag=='':
|
||||
return None
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
|
||||
feeds = [
|
||||
('News',
|
||||
'http://rss.canada.com/get/?F297'),
|
||||
('Sports',
|
||||
'http://rss.canada.com/get/?F299'),
|
||||
('Entertainment',
|
||||
'http://rss.canada.com/get/?F7366'),
|
||||
('Business',
|
||||
'http://rss.canada.com/get/?F6939'),
|
||||
]
|
||||
|
||||
articles = {}
|
||||
key = 'News'
|
||||
ans = ['News']
|
||||
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
|
||||
#self.log(" div class = %s" % divtag['class'])
|
||||
if divtag['class'].startswith('section_title'):
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3,False)
|
||||
ans.append(key)
|
||||
self.log("Section name %s" % key)
|
||||
continue
|
||||
# div contains article data
|
||||
h1tag = divtag.find('h1')
|
||||
if not h1tag:
|
||||
continue
|
||||
atag = h1tag.find('a',href=True)
|
||||
if not atag:
|
||||
continue
|
||||
url = self.url_prefix+'/news/todays-paper/'+atag['href']
|
||||
#self.log("Section %s" % key)
|
||||
#self.log("url %s" % url)
|
||||
title = self.tag_to_string(atag,False)
|
||||
#self.log("title %s" % title)
|
||||
pubdate = ''
|
||||
description = ''
|
||||
ptag = divtag.find('p');
|
||||
if ptag:
|
||||
description = self.tag_to_string(ptag,False)
|
||||
#self.log("description %s" % description)
|
||||
author = ''
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag,False)
|
||||
#self.log("author %s" % author)
|
||||
if not articles.has_key(key):
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return ans
|
||||
|
22
recipes/nachdenkseiten.recipe
Normal file
22
recipes/nachdenkseiten.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Nachdenkseiten(BasicNewsRecipe):
|
||||
title = u'Nachdenkseiten'
|
||||
__author__ = 'jrda'
|
||||
publisher = 'www.nachdenkseiten.de Albrecht Mueller und Dr. Wolfgang Lieb'
|
||||
description = 'NachDenkSeiten - Die kritische Website'
|
||||
category = 'news'
|
||||
oldest_article = 7
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
timefmt = ''
|
||||
max_articles_per_feed = 6
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
keep_only_tags = [
|
||||
{'id':'content'}]
|
||||
|
||||
feeds = [
|
||||
('News', 'http://www.nachdenkseiten.de/?feed=rss2'),
|
||||
]
|
21
recipes/national_geographic_es.recipe
Normal file
21
recipes/national_geographic_es.recipe
Normal file
@ -0,0 +1,21 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Vakya'
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '14, May 2012'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
|
||||
|
||||
title = u'National Geographic'
|
||||
publisher = u'National Geographic'
|
||||
__author__ = 'Vakya'
|
||||
description = 'Revista National Geographic - Últimas noticias'
|
||||
language = 'es'
|
||||
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags_before = dict(name='p' , attrs={'class':['image']})
|
||||
remove_tags_after = dict(name='hr')
|
||||
feeds = [(u'Vida salvage', u'http://www.nationalgeographic.com.es/feeds/rss.html')]
|
16
recipes/national_geographic_it.recipe
Normal file
16
recipes/national_geographic_it.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '5, May 2012'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
|
||||
title = u'National Geographic'
|
||||
__author__ = 'faber1971'
|
||||
description = 'Science magazine'
|
||||
language = 'it'
|
||||
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags = [dict(name='div',attrs={'class':'banner-abbonamenti'})]
|
||||
feeds = [(u'National Geographic', u'http://www.nationalgeographic.it/rss/all/rss2.0.xml')]
|
@ -1,5 +1,4 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
@ -11,22 +10,8 @@ class NYTimes(BasicNewsRecipe):
|
||||
needs_subscription = False
|
||||
|
||||
no_stylesheets = True
|
||||
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'npStoryTools npWidth1-6 npRight npTxtStrong'})
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name='div', attrs={'class':['story-tools', 'npStoryTools npWidth1-6 npRight npTxtStrong']}),
|
||||
#dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
|
||||
#dict(name='form', attrs={'onsubmit':''}),
|
||||
dict(name='ul', attrs={'class':'npTxtAlt npGroup npTxtCentre npStoryShare npTxtStrong npTxtDim'}),
|
||||
]
|
||||
|
||||
# def preprocess_html(self, soup):
|
||||
# table = soup.find('table')
|
||||
# if table is not None:
|
||||
# table.extract()
|
||||
# return soup
|
||||
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//*[@class="npStoryPhoto npTxtPlain"]'
|
||||
|
||||
|
||||
#TO GET ARTICLE TOC
|
||||
@ -53,14 +38,14 @@ class NYTimes(BasicNewsRecipe):
|
||||
if current_section is not None and x.name == 'h5':
|
||||
# Article found
|
||||
title = self.tag_to_string(x)
|
||||
a = x.find('a', href=lambda x: x and 'story' in x)
|
||||
a = x.find('a', href=True)
|
||||
if a is None:
|
||||
continue
|
||||
url = a.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
#if url.startswith('story'):
|
||||
url = 'http://www.nationalpost.com/todays-paper/'+url
|
||||
#url = 'http://www.nationalpost.com/todays-paper/'+url
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
@ -70,11 +55,4 @@ class NYTimes(BasicNewsRecipe):
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
return feeds
|
||||
def preprocess_html(self, soup):
|
||||
story = soup.find(name='div', attrs={'id':'npContentMain'})
|
||||
##td = heading.findParent(name='td')
|
||||
##td.extract()
|
||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||
body = soup.find(name='body')
|
||||
body.insert(0, story)
|
||||
return soup
|
||||
|
||||
|
42
recipes/navy_times.recipe
Normal file
42
recipes/navy_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class NavyTimes(BasicNewsRecipe):
|
||||
title = 'Navy Times'
|
||||
__author__ = 'jde'
|
||||
__date__ = '16 May 2012'
|
||||
__version__ = '1.0'
|
||||
description = 'News of the U.S. Navy'
|
||||
language = 'en'
|
||||
publisher = 'NavyTimes.com'
|
||||
category = 'news, U.S. Navy'
|
||||
tags = 'news, U.S. Navy'
|
||||
cover_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
|
||||
masthead_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
|
||||
oldest_article = 7 #days
|
||||
max_articles_per_feed = 25
|
||||
publication_type = 'newspaper'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = None
|
||||
recursions = 0
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
|
||||
('News', 'http://www.navytimes.com/rss_news.php'),
|
||||
('Benefits', 'http://www.navytimes.com/rss_benefits.php'),
|
||||
('Money', 'http://www.navytimes.com/rss_money.php'),
|
||||
('Careers & Education', 'http://www.navytimes.com/rss_careers.php'),
|
||||
('Community', 'http://www.navytimes.com/rss_community.php'),
|
||||
('Off Duty', 'http://www.navytimes.com/rss_off_duty.php'),
|
||||
('Entertainment', 'http://www.navytimes.com/rss_entertainment.php'),
|
||||
('Guard & Reserve', 'http://www.navytimes.com/rss_guard.php'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
20
recipes/news_busters.recipe
Normal file
20
recipes/news_busters.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewsBusters(BasicNewsRecipe):
|
||||
title = u'News Busters'
|
||||
description = 'Exposing and Combating Liberal Media Bias'
|
||||
__author__ = 'jde'
|
||||
oldest_article = 1#day
|
||||
max_articles_per_feed = 100
|
||||
cover_url = "http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png"
|
||||
language = 'en'
|
||||
encoding = 'utf8'
|
||||
needs_subscription = False
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Blog', u'http://www.newsbusters.org/rss.xml')]
|
||||
|
@ -102,7 +102,7 @@ class Newsweek(BasicNewsRecipe):
|
||||
if len(options) > self.BACK_ISSUES:
|
||||
option = options[self.BACK_ISSUES];
|
||||
self.EDITION = option['value'].replace('http://www.newsweek.pl/wydania/','')
|
||||
issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
|
||||
self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
|
||||
else:
|
||||
self.BACK_ISSUES = self.BACK_ISSUES - len(options)
|
||||
self.YEAR = self.YEAR - 1
|
||||
|
@ -9,10 +9,10 @@ import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Pescanik(BasicNewsRecipe):
|
||||
title = 'Peščanik'
|
||||
title = u'Peščanik'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
|
||||
publisher = 'Peščanik'
|
||||
description = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
|
||||
publisher = u'Peščanik'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 10
|
||||
max_articles_per_feed = 100
|
||||
|
@ -1,5 +1,5 @@
|
||||
"""
|
||||
Pocket Calibre Recipe v1.0
|
||||
Pocket Calibre Recipe v1.2
|
||||
"""
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '''
|
||||
@ -73,6 +73,9 @@ class Pocket(BasicNewsRecipe):
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
ritem = soup.find('ul', attrs={'id':'list'})
|
||||
if ritem is None:
|
||||
self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
|
||||
continue
|
||||
for item in reversed(ritem.findAll('li')):
|
||||
if articlesToGrab < 1:
|
||||
break
|
||||
@ -94,7 +97,12 @@ class Pocket(BasicNewsRecipe):
|
||||
self.readList.append(readLink)
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
if len(self.readList) < self.minimum_articles:
|
||||
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
|
||||
self.mark_as_read_after_dl = False
|
||||
if hasattr(self, 'abort_recipe_processing'):
|
||||
self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
|
||||
else:
|
||||
self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
|
||||
return []
|
||||
return totalfeeds
|
||||
|
||||
def mark_as_read(self, markList):
|
||||
|
34
recipes/rebelion.recipe
Normal file
34
recipes/rebelion.recipe
Normal file
@ -0,0 +1,34 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class RebelionRecipe (BasicNewsRecipe):
|
||||
__author__ = u'Marc Busqué <marc@lamarciana.com>' #Thanks to atlantique http://www.mobileread.com/forums/member.php?u=67876
|
||||
__url__ = 'http://www.lamarciana.com'
|
||||
__version__ = '1.0'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
|
||||
title = u'Rebelion.org'
|
||||
description = u'Rebelión pretende ser un medio de información alternativa que publique las noticias que no son consideradas importantes por los medios de comunicación tradicionales. También, dar a las noticias un tratamiento diferente en la línea de mostrar los intereses que los poderes económicos y políticos del mundo capitalista ocultan para mantener sus privilegios y el status actual. Queremos servir y ayudarnos de todos los grupos, colectivos y personas que trabajan por cambiar este mundo en una perspectiva radicalmente diferente, más justa, igualitaria y equilibrada social y ecológicamente. Es nuestro objetivo contar con la participación y colaboración de todos vosotros para que Rebelión sea un espacio serio, riguroso y actualizado en la difusión de noticias.'
|
||||
url = 'http://www.rebelion.org'
|
||||
language = 'es'
|
||||
tags = 'contrainformación, información alternativa'
|
||||
oldest_article = 1
|
||||
remove_empty_feeds = True
|
||||
encoding = 'latin1' #
|
||||
keep_only_tags = [
|
||||
{'name': 'div', 'attrs': {'id': 'CuerpoNoticia'}}
|
||||
]
|
||||
no_stylesheets = True
|
||||
extra_css = '.autor {font-style: italic;} .titulo {font-size: 150%;} .titulo, .pretitulo {text-align: center;} #TextoNoticia {text-align:justify;} .autor, .fuente, .entradilla {font-size: 90%; text-align: left;}'
|
||||
|
||||
feeds = [
|
||||
(u'Titulares del día', u'http://www.rebelion.org/rss_portada.php'),
|
||||
]
|
||||
|
||||
#See http://www.mobileread.com/forums/showthread.php?t=174501
|
||||
def print_version(self, url):
|
||||
id = re.compile('\d*$').search(url).group()
|
||||
return u'http://www.rebelion.org/noticia.php?id=%s' % id
|
22
recipes/revista_summa.recipe
Normal file
22
recipes/revista_summa.recipe
Normal file
@ -0,0 +1,22 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Vakya'
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '14, May 2012'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
|
||||
|
||||
title = u'Revista Summa'
|
||||
publisher = u'Summa'
|
||||
__author__ = 'Vakya'
|
||||
description = 'Informacion regional sobre economia y negocios'
|
||||
language = 'es'
|
||||
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags_before = dict(name='h1')
|
||||
remove_tags_after = dict(name='label')
|
||||
feeds = [(u'Revista Summa', u'http://www.revistasumma.com/rss/rss-v2.0.rss')]
|
||||
|
61
recipes/shortlist.recipe
Normal file
61
recipes/shortlist.recipe
Normal file
@ -0,0 +1,61 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1324663493(BasicNewsRecipe):
|
||||
title = u'Shortlist'
|
||||
description = 'Articles From Shortlist.com'
|
||||
# I've set oldest article to 7 days as the website updates weekly
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 12
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 19/5/12
|
||||
language = 'en_GB'
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.shortlist.com')
|
||||
cov = soup.find(attrs={'width' : '121'})
|
||||
#print '******** ',cov,' ***'
|
||||
#cover_url = 'http://www.shortlist.com'+cov['src']
|
||||
cover_url =cov['src']
|
||||
return cover_url
|
||||
|
||||
masthead_url = 'http://www.mediauk.com/logos/100/344096.png'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'…or.*?email to your friends</a>.', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
keep_only_tags = [
|
||||
#dict(name='h1'),
|
||||
dict(name='h2',attrs={'class' : 'title'}),
|
||||
dict(name='h3',atts={'class' : 'subheading'}),
|
||||
dict(attrs={'class' : [ 'hero-static','stand-first']}),
|
||||
dict(attrs={'class' : 'hero-image'}),
|
||||
dict(name='div',attrs={'id' : ['list','article','article alternate']}),
|
||||
dict(name='div',attrs={'class' : 'stand-first'}),
|
||||
]
|
||||
remove_tags = [dict(name='h2',attrs={'class' : 'graphic-header'}),
|
||||
dict(attrs={'id' : ['share','twitter','facebook','digg','delicious','facebook-like']}),
|
||||
dict(atts={'class' : ['related-content','related-content-item','related-content horizontal','more']}),
|
||||
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='p',attrs={'id' : 'tags'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Home carousel',u'http://feed43.com/7106317222455380.xml'),
|
||||
(u'This Weeks Issue', u'http://feed43.com/0323588208751786.xml'),
|
||||
(u'Cool Stuff',u'http://feed43.com/6253845228768456.xml'),
|
||||
(u'Style',u'http://feed43.com/7217107577215678.xml'),
|
||||
(u'Films',u'http://feed43.com/3101308515277265.xml'),
|
||||
(u'Music',u'http://feed43.com/2416400550560162.xml'),
|
||||
(u'TV',u'http://feed43.com/4781172470717123.xml'),
|
||||
(u'Sport',u'http://feed43.com/5303151885853308.xml'),
|
||||
(u'Gaming',u'http://feed43.com/8883764600355347.xml'),
|
||||
(u'Women',u'http://feed43.com/2648221746514241.xml'),
|
||||
(u'Instant Improver', u'http://feed43.com/1236541026275417.xml'),
|
||||
|
||||
#(u'Articles', u'http://feed43.com/3428534448355545.xml')
|
||||
]
|
@ -1,3 +1,4 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
@ -15,6 +16,8 @@ class Spiegel_int(BasicNewsRecipe):
|
||||
language = 'en_DE'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
|
||||
encoding = 'cp1252'
|
||||
publisher = 'SPIEGEL ONLINE GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
@ -43,25 +46,25 @@ class Spiegel_int(BasicNewsRecipe):
|
||||
.spPhotoGallery{font-size:x-small; color:#990000 ;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
|
||||
remove_tags_after = dict(attrs={'id':'spArticleBody'})
|
||||
remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
|
||||
remove_attributes = ['clear']
|
||||
#keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
|
||||
#remove_tags_after = dict(attrs={'id':'spArticleBody'})
|
||||
#remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
|
||||
#remove_attributes = ['clear']
|
||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, rest = url.rpartition(',')
|
||||
rmain, rsep, rrest = main.rpartition(',')
|
||||
return rmain + ',druck-' + rrest + ',' + rest
|
||||
#def print_version(self, url):
|
||||
#main, sep, rest = url.rpartition(',')
|
||||
#rmain, rsep, rrest = main.rpartition(',')
|
||||
#return rmain + ',druck-' + rrest + ',' + rest
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
return soup
|
||||
#def preprocess_html(self, soup):
|
||||
#for item in soup.findAll(style=True):
|
||||
#del item['style']
|
||||
#for item in soup.findAll('a'):
|
||||
#if item.string is not None:
|
||||
#str = item.string
|
||||
#item.replaceWith(str)
|
||||
#else:
|
||||
#str = self.tag_to_string(item)
|
||||
#item.replaceWith(str)
|
||||
#return soup
|
||||
|
@ -6,7 +6,6 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
spiegel.de
|
||||
'''
|
||||
|
||||
from time import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Spiegel_ger(BasicNewsRecipe):
|
||||
@ -21,6 +20,8 @@ class Spiegel_ger(BasicNewsRecipe):
|
||||
lang = 'de-DE'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
|
||||
encoding = 'cp1252'
|
||||
|
||||
conversion_options = {
|
||||
@ -31,20 +32,9 @@ class Spiegel_ger(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'spArticleContent'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','base','iframe'])]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
|
||||
|
||||
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
rmt = url.rpartition('#')[0]
|
||||
main, sep, rest = rmt.rpartition(',')
|
||||
rmain, rsep, rrest = main.rpartition(',')
|
||||
purl = rmain + ',druck-' + rrest + ',' + rest
|
||||
return purl
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://wissen.spiegel.de/wissen/titel/SP/' + strftime("%Y/%W/%j/titel.jpg")
|
||||
|
||||
|
39
recipes/stars_and_stripes.recipe
Normal file
39
recipes/stars_and_stripes.recipe
Normal file
@ -0,0 +1,39 @@
|
||||
''' Stars and Stripes
|
||||
'''
|
||||
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class AdvancedUserRecipe1308791026(BasicNewsRecipe):
|
||||
title = u'Stars and Stripes'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
__author__ = 'adoucette'
|
||||
description = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.'
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'stripes.com'
|
||||
category = 'news, US, world'
|
||||
language = 'en'
|
||||
publication_type = 'newsportal'
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
,'linearize_tables': True
|
||||
}
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['element article']})]
|
||||
remove_tags_after = [dict(name='ul', attrs={'class':'inline-bookmarks'})]
|
||||
feeds = [
|
||||
(u'News', u'http://feeds.stripes.com/starsandstripes/news'),
|
||||
(u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'),
|
||||
(u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'),
|
||||
(u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'),
|
||||
(u'Travel', u'http://feeds.stripes.com/starsandstripes/travel')
|
||||
]
|
92
recipes/strategic_culture.recipe
Normal file
92
recipes/strategic_culture.recipe
Normal file
@ -0,0 +1,92 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
www.strategic-culture.org
|
||||
'''
|
||||
|
||||
import time
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class StrategicCulture(BasicNewsRecipe):
|
||||
title = 'Strategic Culture Foundation'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Online Journal'
|
||||
publisher = 'Strategic Culture Foundation'
|
||||
category = 'news, politics'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://www.strategic-culture.org/img/logo.jpg'
|
||||
extra_css = '''
|
||||
body{font-family: Arial, sans-serif}
|
||||
h1{font-family: "Times New Roman",Times,serif}
|
||||
img{margin-bottom: 0.8em}
|
||||
'''
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name=['h1','p'])
|
||||
,dict(name='div', attrs={'id':'cke_pastebin'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name=['object','link','base','meta','iframe'])]
|
||||
|
||||
feeds = [
|
||||
(u'News' , u'http://www.strategic-culture.org/blocks/news.html' )
|
||||
,(u'Politics' , u'http://www.strategic-culture.org/rubrics/politics.html' )
|
||||
,(u'Economics' , u'http://www.strategic-culture.org/rubrics/economics.html' )
|
||||
,(u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html')
|
||||
,(u'Columnists' , u'http://www.strategic-culture.org/rubrics/columnists.html' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('-culture.org/news/','-culture.org/pview/')
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
if feedurl.endswith('news.html'):
|
||||
clname = 'sini14'
|
||||
else:
|
||||
clname = 'h22'
|
||||
checker = []
|
||||
for item in soup.findAll('a', attrs={'class':clname}):
|
||||
atag = item
|
||||
url = atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
description = ''
|
||||
daypart = url.rpartition('/')[0]
|
||||
mpart,sep,day = daypart.rpartition('/')
|
||||
ypart,sep,month = mpart.rpartition('/')
|
||||
year = ypart.rpartition('/')[2]
|
||||
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y"))
|
||||
if url not in checker:
|
||||
checker.append(url)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2012, mkydgr'
|
||||
'''
|
||||
www.wired.com
|
||||
based on the (broken) built-in recipe by Darko Miletic <darko.miletic at gmail.com>
|
||||
'''
|
||||
|
||||
import re
|
||||
@ -11,11 +12,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Wired(BasicNewsRecipe):
|
||||
title = 'Wired Magazine'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Gaming news'
|
||||
__author__ = 'mkydgr'
|
||||
description = 'Technology News'
|
||||
publisher = 'Conde Nast Digital'
|
||||
category = 'news, games, IT, gadgets'
|
||||
oldest_article = 32
|
||||
category = ''
|
||||
oldest_article = 500
|
||||
delay = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -25,7 +26,8 @@ class Wired(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
publication_type = 'magazine'
|
||||
extra_css = ' body{font-family: Arial,Verdana,sans-serif} .entryDescription li {display: inline; list-style-type: none} '
|
||||
index = 'http://www.wired.com/magazine/'
|
||||
index = 'http://www.wired.com/magazine'
|
||||
departments = ['features','start','test','play','found', 'reviews']
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')]
|
||||
conversion_options = {
|
||||
@ -38,80 +40,53 @@ class Wired(BasicNewsRecipe):
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
|
||||
remove_tags = [
|
||||
dict(name=['object','embed','iframe','link','meta','base'])
|
||||
dict(name=['object','embed','iframe','link'])
|
||||
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
|
||||
,dict(attrs={'id':'ff_bottom_nav'})
|
||||
,dict(name='a',attrs={'href':'http://www.wired.com/app'})
|
||||
]
|
||||
remove_attributes = ['height','width','lang','border','clear']
|
||||
remove_attributes = ['height','width']
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
|
||||
soup = self.index_to_soup(self.index)
|
||||
majorf = soup.find('div',attrs={'class':'index'})
|
||||
if majorf:
|
||||
pfarticles = []
|
||||
firsta = majorf.find(attrs={'class':'spread-header'})
|
||||
if firsta:
|
||||
pfarticles.append({
|
||||
'title' :self.tag_to_string(firsta.a)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :'http://www.wired.com' + firsta.a['href']
|
||||
,'description':''
|
||||
})
|
||||
for itt in majorf.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
pfarticles.append({
|
||||
'title' :self.tag_to_string(itema)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :'http://www.wired.com' + itema['href']
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append(('Cover', pfarticles))
|
||||
features = soup.find('div',attrs={'id':'my-glider'})
|
||||
if features:
|
||||
farticles = []
|
||||
for item in features.findAll('div',attrs={'class':'section'}):
|
||||
divurl = item.find('div',attrs={'class':'feature-header'})
|
||||
if divurl:
|
||||
divdesc = item.find('div',attrs={'class':'feature-text'})
|
||||
url = divurl.a['href']
|
||||
if not divurl.a['href'].startswith('http://www.wired.com'):
|
||||
url = 'http://www.wired.com' + divurl.a['href']
|
||||
title = self.tag_to_string(divurl.a)
|
||||
description = self.tag_to_string(divdesc)
|
||||
date = strftime(self.timefmt)
|
||||
farticles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
totalfeeds.append(('Featured Articles', farticles))
|
||||
|
||||
#department feeds
|
||||
departments = ['rants','start','test','play','found']
|
||||
dept = soup.find('div',attrs={'id':'magazine-departments'})
|
||||
if dept:
|
||||
for ditem in departments:
|
||||
depts = soup.find('div',attrs={'id':'department-posts'})
|
||||
|
||||
if depts:
|
||||
for ditem in self.departments:
|
||||
darticles = []
|
||||
department = dept.find('div',attrs={'id':'department-'+ditem})
|
||||
department = depts.find('h3',attrs={'id':'department-'+ditem})
|
||||
if department:
|
||||
for item in department.findAll('div'):
|
||||
description = ''
|
||||
feed_link = item.find('a')
|
||||
if feed_link and feed_link.has_key('href'):
|
||||
url = feed_link['href']
|
||||
title = self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
darticles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
#print '\n###### Found department %s ########'%(ditem)
|
||||
|
||||
el = department.next
|
||||
while el and (el.__class__.__name__ == 'NavigableString' or el.name != 'h3'):
|
||||
if el.__class__.__name__ != 'NavigableString':
|
||||
#print '\t ... element',el.name
|
||||
if el.name == 'ul':
|
||||
for artitem in el.findAll('li'):
|
||||
#print '\t\t ... article',repr(artitem)
|
||||
feed_link = artitem.find('a')
|
||||
#print '\t\t\t ... link',repr(feed_link)
|
||||
if feed_link and feed_link.has_key('href'):
|
||||
url = self.makeurl(feed_link['href'])
|
||||
title = self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
#print '\t\t ... found "%s" %s'%(title,url)
|
||||
darticles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
el = None
|
||||
else:
|
||||
el = el.next
|
||||
|
||||
totalfeeds.append((ditem.capitalize(), darticles))
|
||||
return totalfeeds
|
||||
|
||||
@ -120,7 +95,7 @@ class Wired(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(self.index)
|
||||
cover_item = soup.find('div',attrs={'class':'spread-image'})
|
||||
if cover_item:
|
||||
cover_url = 'http://www.wired.com' + cover_item.a.img['src']
|
||||
cover_url = self.makeurl(cover_item.a.img['src'])
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
@ -129,17 +104,10 @@ class Wired(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
if item.string is not None:
|
||||
tstr = item.string
|
||||
item.replaceWith(tstr)
|
||||
else:
|
||||
item.name='span'
|
||||
for atrs in ['href','target','alt','title','name','id']:
|
||||
if item.has_key(atrs):
|
||||
del item[atrs]
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
def makeurl(self, addr):
|
||||
if addr[:4] != 'http' : addr='http://www.wired.com' + addr
|
||||
while addr[-2:] == '//' : addr=addr[:-1]
|
||||
return addr
|
||||
|
||||
|
BIN
resources/compiled_coffeescript.zip
Normal file
BIN
resources/compiled_coffeescript.zip
Normal file
Binary file not shown.
@ -490,12 +490,6 @@ save_original_format = True
|
||||
# how many should be shown, here.
|
||||
gui_view_history_size = 15
|
||||
|
||||
#: When using the 'Tweak Book' action, which format to prefer
|
||||
# When tweaking a book that has multiple formats, calibre picks one
|
||||
# automatically. By default EPUB is preferred to HTMLZ. If you would like to
|
||||
# prefer HTMLZ to EPUB for tweaking, change this to 'htmlz'
|
||||
tweak_book_prefer = 'epub'
|
||||
|
||||
#: Change the font size of book details in the interface
|
||||
# Change the font size at which book details are rendered in the side panel and
|
||||
# comments are rendered in the metadata edit dialog. Set it to a positive or
|
||||
@ -512,3 +506,17 @@ change_book_details_font_size_by = 0
|
||||
# No compile: compile_gpm_templates = False
|
||||
compile_gpm_templates = True
|
||||
|
||||
#: What format to default to when using the Tweak feature
|
||||
# The Tweak feature of calibre allows direct editing of a book format.
|
||||
# If multiple formats are available, calibre will offer you a choice
|
||||
# of formats, defaulting to your preferred output format if it is available.
|
||||
# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
|
||||
# to that format rather than your output format preference.
|
||||
# Set to a value of 'remember' to use whichever format you chose last time you
|
||||
# used the Tweak feature.
|
||||
# Examples:
|
||||
# default_tweak_format = None (Use output format)
|
||||
# default_tweak_format = 'EPUB'
|
||||
# default_tweak_format = 'remember'
|
||||
default_tweak_format = None
|
||||
|
||||
|
37
session.vim
37
session.vim
@ -1,30 +1,17 @@
|
||||
" Project wide builtins
|
||||
let $PYFLAKES_BUILTINS = "_,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext"
|
||||
|
||||
python << EOFPY
|
||||
import os, sys
|
||||
fun! CalibreLog()
|
||||
" Setup buffers to edit the calibre changelog and version info prior to
|
||||
" making a release.
|
||||
enew
|
||||
read ! bzr log -l 500
|
||||
set nomodifiable noswapfile buftype=nofile
|
||||
edit Changelog.yaml
|
||||
edit src/calibre/constants.py
|
||||
endfun
|
||||
|
||||
import vipy
|
||||
nnoremap \log :call CalibreLog()<CR>
|
||||
|
||||
source_file = vipy.vipy.eval('expand("<sfile>")')
|
||||
project_dir = os.path.dirname(source_file)
|
||||
src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
|
||||
base_dir = os.path.join(src_dir, 'calibre')
|
||||
|
||||
sys.path.insert(0, src_dir)
|
||||
sys.resources_location = os.path.join(project_dir, 'resources')
|
||||
sys.extensions_location = os.path.join(base_dir, 'plugins')
|
||||
sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
|
||||
|
||||
vipy.session.initialize(project_name='calibre', src_dir=src_dir,
|
||||
project_dir=project_dir, base_dir=project_dir)
|
||||
|
||||
def recipe_title_callback(raw):
|
||||
return eval(raw.decode('utf-8')).replace(' ', '_')
|
||||
|
||||
vipy.session.add_content_browser('<leader>r', 'Recipe',
|
||||
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
|
||||
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
|
||||
EOFPY
|
||||
|
||||
nmap \log :enew<CR>:read ! bzr log -l 500 <CR>:e Changelog.yaml<CR>:e src/calibre/constants.py<CR>
|
||||
python import init_calibre
|
||||
python import calibre
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, socket, struct, subprocess
|
||||
import os, socket, struct, subprocess, glob
|
||||
from distutils.spawn import find_executable
|
||||
|
||||
from PyQt4 import pyqtconfig
|
||||
@ -120,7 +120,7 @@ if iswindows:
|
||||
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir)
|
||||
popplerqt4_lib_dirs = poppler_lib_dirs
|
||||
poppler_libs = ['poppler']
|
||||
magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.6.6')]
|
||||
magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.7.6')]
|
||||
magick_lib_dirs = [os.path.join(magick_inc_dirs[0], 'VisualMagick', 'lib')]
|
||||
magick_libs = ['CORE_RL_wand_', 'CORE_RL_magick_']
|
||||
podofo_inc = os.path.join(sw_inc_dir, 'podofo')
|
||||
@ -128,8 +128,9 @@ if iswindows:
|
||||
elif isosx:
|
||||
fc_inc = '/sw/include/fontconfig'
|
||||
fc_lib = '/sw/lib'
|
||||
poppler = glob.glob('/sw/build/poppler-*')[-1]
|
||||
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
|
||||
'/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5')
|
||||
'{0}/poppler:{0}'.format(poppler))
|
||||
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
|
||||
'/sw/lib')
|
||||
poppler_libs = ['poppler']
|
||||
@ -191,6 +192,9 @@ else:
|
||||
lh = os.path.join(poppler_inc_dirs[0], 'Link.h')
|
||||
if 'class AnnotLink' not in open(lh, 'rb').read():
|
||||
poppler_cflags.append('-DPOPPLER_OLD_LINK_TYPE')
|
||||
ph = os.path.join(poppler_inc_dirs[0], 'Page.h')
|
||||
if 'getLinks(Catalog' in open(ph, 'rb').read():
|
||||
poppler_cflags.append('-DPOPPLER_PRE_20')
|
||||
|
||||
magick_error = None
|
||||
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
|
||||
|
@ -22,7 +22,8 @@ Do not modify it unless you know what you are doing.
|
||||
import sys, os
|
||||
|
||||
path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r})
|
||||
sys.path.insert(0, path)
|
||||
if path not in sys.path:
|
||||
sys.path.insert(0, path)
|
||||
|
||||
sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r})
|
||||
sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r})
|
||||
|
@ -32,7 +32,7 @@ binary_includes = [
|
||||
'/lib/libz.so.1',
|
||||
'/usr/lib/libtiff.so.5',
|
||||
'/lib/libbz2.so.1',
|
||||
'/usr/lib/libpoppler.so.7',
|
||||
'/usr/lib/libpoppler.so.25',
|
||||
'/usr/lib/libxml2.so.2',
|
||||
'/usr/lib/libopenjpeg.so.2',
|
||||
'/usr/lib/libxslt.so.1',
|
||||
@ -41,8 +41,8 @@ binary_includes = [
|
||||
'/usr/lib/libgthread-2.0.so.0',
|
||||
'/usr/lib/libpng14.so.14',
|
||||
'/usr/lib/libexslt.so.0',
|
||||
MAGICK_PREFIX+'/lib/libMagickWand.so.4',
|
||||
MAGICK_PREFIX+'/lib/libMagickCore.so.4',
|
||||
MAGICK_PREFIX+'/lib/libMagickWand.so.5',
|
||||
MAGICK_PREFIX+'/lib/libMagickCore.so.5',
|
||||
'/usr/lib/libgcrypt.so.11',
|
||||
'/usr/lib/libgpg-error.so.0',
|
||||
'/usr/lib/libphonon.so.4',
|
||||
|
@ -385,7 +385,7 @@ class Py2App(object):
|
||||
@flush
|
||||
def add_poppler(self):
|
||||
info('\nAdding poppler')
|
||||
for x in ('libpoppler.7.dylib',):
|
||||
for x in ('libpoppler.25.dylib',):
|
||||
self.install_dylib(os.path.join(SW, 'lib', x))
|
||||
self.install_dylib(os.path.join(SW, 'bin', 'pdftohtml'), False)
|
||||
|
||||
@ -429,7 +429,7 @@ class Py2App(object):
|
||||
def add_imagemagick(self):
|
||||
info('\nAdding ImageMagick')
|
||||
for x in ('Wand', 'Core'):
|
||||
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.4.dylib'%x))
|
||||
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.5.dylib'%x))
|
||||
idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1]
|
||||
dest = os.path.join(self.frameworks_dir, 'ImageMagick')
|
||||
if os.path.exists(dest):
|
||||
|
@ -18,7 +18,7 @@ QT_DIR = 'Q:\\Qt\\4.8.1'
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
SW = r'C:\cygwin\home\kovid\sw'
|
||||
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.6.6',
|
||||
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.7.6',
|
||||
'VisualMagick', 'bin')
|
||||
CRT = r'C:\Microsoft.VC90.CRT'
|
||||
|
||||
|
@ -295,7 +295,7 @@ NOTE: poppler must be built as a static library, unless you build the qt4 bindin
|
||||
|
||||
Now do the same for the pdftohtml project
|
||||
|
||||
cp poppler/*.h ~/sw/include/poppler && cp goo/*.h ~/sw/include/poppler/goo && cp splash/*.h ~/sw/include/poppler/splash && cp build/Release/poppler.lib ../../lib/ && cp build/utils/Release/*.exe ../../bin/
|
||||
cp poppler/*.h ~/sw/include/poppler && cp goo/*.h ~/sw/include/poppler/goo && cp splash/*.h ~/sw/include/poppler/splash && cp build/Release/poppler.lib ../../lib/ && cp build/utils/Release/pdftohtml.exe ../../bin/
|
||||
|
||||
|
||||
podofo
|
||||
@ -336,6 +336,8 @@ Index: src/PdfFiltersPrivate.cpp
|
||||
ImageMagick
|
||||
--------------
|
||||
|
||||
Get the source from: http://www.imagemagick.org/download/windows/ImageMagick-windows.zip
|
||||
|
||||
Edit VisualMagick/configure/configure.cpp to set
|
||||
|
||||
int projectType = MULTITHREADEDDLL;
|
||||
@ -349,7 +351,10 @@ Edit magick/magick-config.h
|
||||
Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
|
||||
|
||||
Now open VisualMagick/VisualDynamicMT.sln set to Release
|
||||
Remove the CORE_xlib and UTIL_Imdisplay project CORE_Magick++
|
||||
Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects.
|
||||
|
||||
F7 for build project, you will get one error due to the removal of xlib, ignore
|
||||
it.
|
||||
|
||||
calibre
|
||||
---------
|
||||
|
@ -12,14 +12,14 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2012-04-12 09:56+0000\n"
|
||||
"PO-Revision-Date: 2012-05-03 16:09+0000\n"
|
||||
"Last-Translator: Dídac Rios <didac@niorcs.com>\n"
|
||||
"Language-Team: Catalan <linux@softcatala.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2012-04-13 05:26+0000\n"
|
||||
"X-Generator: Launchpad (build 15070)\n"
|
||||
"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
|
||||
"X-Generator: Launchpad (build 15195)\n"
|
||||
"Language: ca\n"
|
||||
|
||||
#. name for aaa
|
||||
@ -9536,7 +9536,7 @@ msgstr "Ani"
|
||||
|
||||
#. name for hni
|
||||
msgid "Hani"
|
||||
msgstr ""
|
||||
msgstr "Haní"
|
||||
|
||||
#. name for hnj
|
||||
msgid "Hmong Njua"
|
||||
@ -9544,7 +9544,7 @@ msgstr "Miao; Hmong Njua"
|
||||
|
||||
#. name for hnn
|
||||
msgid "Hanunoo"
|
||||
msgstr ""
|
||||
msgstr "Hanunoo"
|
||||
|
||||
#. name for hno
|
||||
msgid "Hindko; Northern"
|
||||
@ -9552,35 +9552,35 @@ msgstr "Hindko; septentrional"
|
||||
|
||||
#. name for hns
|
||||
msgid "Hindustani; Caribbean"
|
||||
msgstr ""
|
||||
msgstr "Hindustaní; Caribeny"
|
||||
|
||||
#. name for hnu
|
||||
msgid "Hung"
|
||||
msgstr ""
|
||||
msgstr "Hung"
|
||||
|
||||
#. name for hoa
|
||||
msgid "Hoava"
|
||||
msgstr ""
|
||||
msgstr "Hoava"
|
||||
|
||||
#. name for hob
|
||||
msgid "Mari (Madang Province)"
|
||||
msgstr ""
|
||||
msgstr "Mari (Província de Madang)"
|
||||
|
||||
#. name for hoc
|
||||
msgid "Ho"
|
||||
msgstr ""
|
||||
msgstr "Ho"
|
||||
|
||||
#. name for hod
|
||||
msgid "Holma"
|
||||
msgstr ""
|
||||
msgstr "Holma"
|
||||
|
||||
#. name for hoe
|
||||
msgid "Horom"
|
||||
msgstr ""
|
||||
msgstr "Horom"
|
||||
|
||||
#. name for hoh
|
||||
msgid "Hobyót"
|
||||
msgstr ""
|
||||
msgstr "Hobyot"
|
||||
|
||||
#. name for hoi
|
||||
msgid "Holikachuk"
|
||||
@ -9588,11 +9588,11 @@ msgstr "Holikachuk"
|
||||
|
||||
#. name for hoj
|
||||
msgid "Hadothi"
|
||||
msgstr "Hadothi"
|
||||
msgstr "Harautí"
|
||||
|
||||
#. name for hol
|
||||
msgid "Holu"
|
||||
msgstr "Holu"
|
||||
msgstr "Holo"
|
||||
|
||||
#. name for hom
|
||||
msgid "Homa"
|
||||
@ -9628,11 +9628,11 @@ msgstr "Honi"
|
||||
|
||||
#. name for hoy
|
||||
msgid "Holiya"
|
||||
msgstr ""
|
||||
msgstr "Holiya"
|
||||
|
||||
#. name for hoz
|
||||
msgid "Hozo"
|
||||
msgstr ""
|
||||
msgstr "Hozo"
|
||||
|
||||
#. name for hpo
|
||||
msgid "Hpon"
|
||||
@ -9644,7 +9644,7 @@ msgstr "Hawaià Pidgin; llenguatge de signes"
|
||||
|
||||
#. name for hra
|
||||
msgid "Hrangkhol"
|
||||
msgstr "Hrangkhol"
|
||||
msgstr "Hrangkol"
|
||||
|
||||
#. name for hre
|
||||
msgid "Hre"
|
||||
@ -9668,7 +9668,7 @@ msgstr "Horuru"
|
||||
|
||||
#. name for hrt
|
||||
msgid "Hértevin"
|
||||
msgstr "Hértevin"
|
||||
msgstr "Hertevin"
|
||||
|
||||
#. name for hru
|
||||
msgid "Hruso"
|
||||
@ -9724,7 +9724,7 @@ msgstr "Hitu"
|
||||
|
||||
#. name for htx
|
||||
msgid "Hittite; Middle"
|
||||
msgstr "Hittite; Middle"
|
||||
msgstr "Hittita; mitjà"
|
||||
|
||||
#. name for hub
|
||||
msgid "Huambisa"
|
||||
@ -9732,7 +9732,7 @@ msgstr "Huambisa"
|
||||
|
||||
#. name for huc
|
||||
msgid "=/Hua"
|
||||
msgstr ""
|
||||
msgstr "Hua"
|
||||
|
||||
#. name for hud
|
||||
msgid "Huaulu"
|
||||
@ -9740,7 +9740,7 @@ msgstr "Huaulu"
|
||||
|
||||
#. name for hue
|
||||
msgid "Huave; San Francisco Del Mar"
|
||||
msgstr "Huave; San Francisco Del Mar"
|
||||
msgstr "Huave; San Francisco"
|
||||
|
||||
#. name for huf
|
||||
msgid "Humene"
|
||||
@ -9756,7 +9756,7 @@ msgstr "Huilliche"
|
||||
|
||||
#. name for hui
|
||||
msgid "Huli"
|
||||
msgstr "Huli"
|
||||
msgstr "Hulí"
|
||||
|
||||
#. name for huj
|
||||
msgid "Miao; Northern Guiyang"
|
||||
@ -9808,7 +9808,7 @@ msgstr "Huitoto; Murui"
|
||||
|
||||
#. name for huv
|
||||
msgid "Huave; San Mateo Del Mar"
|
||||
msgstr "Huave; San Mateo Del Mar"
|
||||
msgstr "Huave; San Mateo"
|
||||
|
||||
#. name for huw
|
||||
msgid "Hukumina"
|
||||
@ -9820,35 +9820,35 @@ msgstr "Huitoto; Nüpode"
|
||||
|
||||
#. name for huy
|
||||
msgid "Hulaulá"
|
||||
msgstr ""
|
||||
msgstr "Arameu; Hulaula"
|
||||
|
||||
#. name for huz
|
||||
msgid "Hunzib"
|
||||
msgstr ""
|
||||
msgstr "Hunzib"
|
||||
|
||||
#. name for hvc
|
||||
msgid "Haitian Vodoun Culture Language"
|
||||
msgstr ""
|
||||
msgstr "Haitià Vodoun"
|
||||
|
||||
#. name for hve
|
||||
msgid "Huave; San Dionisio Del Mar"
|
||||
msgstr ""
|
||||
msgstr "Huave; San Dionisio"
|
||||
|
||||
#. name for hvk
|
||||
msgid "Haveke"
|
||||
msgstr ""
|
||||
msgstr "Haveke"
|
||||
|
||||
#. name for hvn
|
||||
msgid "Sabu"
|
||||
msgstr ""
|
||||
msgstr "Sabu"
|
||||
|
||||
#. name for hvv
|
||||
msgid "Huave; Santa María Del Mar"
|
||||
msgstr ""
|
||||
msgstr "Huave; Santa Maria"
|
||||
|
||||
#. name for hwa
|
||||
msgid "Wané"
|
||||
msgstr ""
|
||||
msgstr "Wané"
|
||||
|
||||
#. name for hwc
|
||||
msgid "Creole English; Hawai'i"
|
||||
@ -9856,11 +9856,11 @@ msgstr "Anglès crioll; Hawaii"
|
||||
|
||||
#. name for hwo
|
||||
msgid "Hwana"
|
||||
msgstr ""
|
||||
msgstr "Hwana"
|
||||
|
||||
#. name for hya
|
||||
msgid "Hya"
|
||||
msgstr ""
|
||||
msgstr "Hya"
|
||||
|
||||
#. name for hye
|
||||
msgid "Armenian"
|
||||
@ -9868,79 +9868,79 @@ msgstr "armeni"
|
||||
|
||||
#. name for iai
|
||||
msgid "Iaai"
|
||||
msgstr ""
|
||||
msgstr "Iaai"
|
||||
|
||||
#. name for ian
|
||||
msgid "Iatmul"
|
||||
msgstr ""
|
||||
msgstr "Iatmulès"
|
||||
|
||||
#. name for iap
|
||||
msgid "Iapama"
|
||||
msgstr ""
|
||||
msgstr "Iapama"
|
||||
|
||||
#. name for iar
|
||||
msgid "Purari"
|
||||
msgstr ""
|
||||
msgstr "Purari"
|
||||
|
||||
#. name for iba
|
||||
msgid "Iban"
|
||||
msgstr ""
|
||||
msgstr "Iban"
|
||||
|
||||
#. name for ibb
|
||||
msgid "Ibibio"
|
||||
msgstr ""
|
||||
msgstr "Ibibio"
|
||||
|
||||
#. name for ibd
|
||||
msgid "Iwaidja"
|
||||
msgstr ""
|
||||
msgstr "Iwaidja"
|
||||
|
||||
#. name for ibe
|
||||
msgid "Akpes"
|
||||
msgstr ""
|
||||
msgstr "Akpes"
|
||||
|
||||
#. name for ibg
|
||||
msgid "Ibanag"
|
||||
msgstr ""
|
||||
msgstr "Ibanag"
|
||||
|
||||
#. name for ibi
|
||||
msgid "Ibilo"
|
||||
msgstr ""
|
||||
msgstr "Ibilo"
|
||||
|
||||
#. name for ibl
|
||||
msgid "Ibaloi"
|
||||
msgstr ""
|
||||
msgstr "Ibaloi"
|
||||
|
||||
#. name for ibm
|
||||
msgid "Agoi"
|
||||
msgstr ""
|
||||
msgstr "Agoi"
|
||||
|
||||
#. name for ibn
|
||||
msgid "Ibino"
|
||||
msgstr ""
|
||||
msgstr "Ibino"
|
||||
|
||||
#. name for ibo
|
||||
msgid "Igbo"
|
||||
msgstr ""
|
||||
msgstr "Ibo"
|
||||
|
||||
#. name for ibr
|
||||
msgid "Ibuoro"
|
||||
msgstr ""
|
||||
msgstr "Ibuoro"
|
||||
|
||||
#. name for ibu
|
||||
msgid "Ibu"
|
||||
msgstr ""
|
||||
msgstr "Ibu"
|
||||
|
||||
#. name for iby
|
||||
msgid "Ibani"
|
||||
msgstr ""
|
||||
msgstr "Ibani"
|
||||
|
||||
#. name for ica
|
||||
msgid "Ede Ica"
|
||||
msgstr ""
|
||||
msgstr "Ede Ica"
|
||||
|
||||
#. name for ich
|
||||
msgid "Etkywan"
|
||||
msgstr ""
|
||||
msgstr "Etkywan"
|
||||
|
||||
#. name for icl
|
||||
msgid "Icelandic Sign Language"
|
||||
@ -9952,7 +9952,7 @@ msgstr "Anglès crioll; Islander"
|
||||
|
||||
#. name for ida
|
||||
msgid "Idakho-Isukha-Tiriki"
|
||||
msgstr ""
|
||||
msgstr "Idakho-Isukha-Tiriki"
|
||||
|
||||
#. name for idb
|
||||
msgid "Indo-Portuguese"
|
||||
@ -9960,15 +9960,15 @@ msgstr "Indo-portuguès"
|
||||
|
||||
#. name for idc
|
||||
msgid "Idon"
|
||||
msgstr ""
|
||||
msgstr "Idon"
|
||||
|
||||
#. name for idd
|
||||
msgid "Ede Idaca"
|
||||
msgstr ""
|
||||
msgstr "Ede Idaca"
|
||||
|
||||
#. name for ide
|
||||
msgid "Idere"
|
||||
msgstr ""
|
||||
msgstr "Idere"
|
||||
|
||||
#. name for idi
|
||||
msgid "Idi"
|
||||
@ -9976,43 +9976,43 @@ msgstr ""
|
||||
|
||||
#. name for ido
|
||||
msgid "Ido"
|
||||
msgstr ""
|
||||
msgstr "ido"
|
||||
|
||||
#. name for idr
|
||||
msgid "Indri"
|
||||
msgstr ""
|
||||
msgstr "Indri"
|
||||
|
||||
#. name for ids
|
||||
msgid "Idesa"
|
||||
msgstr ""
|
||||
msgstr "Idesa"
|
||||
|
||||
#. name for idt
|
||||
msgid "Idaté"
|
||||
msgstr ""
|
||||
msgstr "Idaté"
|
||||
|
||||
#. name for idu
|
||||
msgid "Idoma"
|
||||
msgstr ""
|
||||
msgstr "Idoma"
|
||||
|
||||
#. name for ifa
|
||||
msgid "Ifugao; Amganad"
|
||||
msgstr ""
|
||||
msgstr "Ifugao; Amganad"
|
||||
|
||||
#. name for ifb
|
||||
msgid "Ifugao; Batad"
|
||||
msgstr ""
|
||||
msgstr "Ifugao; Batad"
|
||||
|
||||
#. name for ife
|
||||
msgid "Ifè"
|
||||
msgstr ""
|
||||
msgstr "Ifè"
|
||||
|
||||
#. name for iff
|
||||
msgid "Ifo"
|
||||
msgstr ""
|
||||
msgstr "Ifo"
|
||||
|
||||
#. name for ifk
|
||||
msgid "Ifugao; Tuwali"
|
||||
msgstr ""
|
||||
msgstr "Ifugao; Tuwali"
|
||||
|
||||
#. name for ifm
|
||||
msgid "Teke-Fuumu"
|
||||
@ -10020,15 +10020,15 @@ msgstr "Teke; Fuumu"
|
||||
|
||||
#. name for ifu
|
||||
msgid "Ifugao; Mayoyao"
|
||||
msgstr ""
|
||||
msgstr "Ifugao; Mayoyao"
|
||||
|
||||
#. name for ify
|
||||
msgid "Kallahan; Keley-I"
|
||||
msgstr ""
|
||||
msgstr "Kallahan; Keley-I"
|
||||
|
||||
#. name for igb
|
||||
msgid "Ebira"
|
||||
msgstr ""
|
||||
msgstr "Ebira"
|
||||
|
||||
#. name for ige
|
||||
msgid "Igede"
|
||||
|
File diff suppressed because it is too large
Load Diff
1430
setup/iso_639/is.po
1430
setup/iso_639/is.po
File diff suppressed because it is too large
Load Diff
@ -8,14 +8,14 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2012-03-25 12:19+0000\n"
|
||||
"Last-Translator: Radan Putnik <srastral@gmail.com>\n"
|
||||
"PO-Revision-Date: 2012-05-03 14:49+0000\n"
|
||||
"Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
|
||||
"Language-Team: Serbian <gnu@prevod.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2012-03-26 04:37+0000\n"
|
||||
"X-Generator: Launchpad (build 15008)\n"
|
||||
"X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
|
||||
"X-Generator: Launchpad (build 15195)\n"
|
||||
"Language: sr\n"
|
||||
|
||||
#. name for aaa
|
||||
@ -6152,7 +6152,7 @@ msgstr ""
|
||||
|
||||
#. name for deu
|
||||
msgid "German"
|
||||
msgstr "немачки"
|
||||
msgstr "Немачки"
|
||||
|
||||
#. name for dev
|
||||
msgid "Domung"
|
||||
@ -8416,7 +8416,7 @@ msgstr "ирски"
|
||||
|
||||
#. name for glg
|
||||
msgid "Galician"
|
||||
msgstr ""
|
||||
msgstr "Галицијски"
|
||||
|
||||
#. name for glh
|
||||
msgid "Pashayi; Northwest"
|
||||
@ -8472,11 +8472,11 @@ msgstr ""
|
||||
|
||||
#. name for gmh
|
||||
msgid "German; Middle High (ca. 1050-1500)"
|
||||
msgstr ""
|
||||
msgstr "Немачки; средње високи (ca. 1050-1500)"
|
||||
|
||||
#. name for gml
|
||||
msgid "German; Middle Low"
|
||||
msgstr ""
|
||||
msgstr "Немачки; средње низак"
|
||||
|
||||
#. name for gmm
|
||||
msgid "Gbaya-Mbodomo"
|
||||
@ -8792,7 +8792,7 @@ msgstr ""
|
||||
|
||||
#. name for gsg
|
||||
msgid "German Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Немачки језик"
|
||||
|
||||
#. name for gsl
|
||||
msgid "Gusilay"
|
||||
@ -8820,7 +8820,7 @@ msgstr ""
|
||||
|
||||
#. name for gsw
|
||||
msgid "German; Swiss"
|
||||
msgstr ""
|
||||
msgstr "Немачки ; Швајцарска"
|
||||
|
||||
#. name for gta
|
||||
msgid "Guató"
|
||||
@ -17954,7 +17954,7 @@ msgstr ""
|
||||
|
||||
#. name for nds
|
||||
msgid "German; Low"
|
||||
msgstr ""
|
||||
msgstr "Немачки; низак"
|
||||
|
||||
#. name for ndt
|
||||
msgid "Ndunga"
|
||||
@ -18778,7 +18778,7 @@ msgstr ""
|
||||
|
||||
#. name for nno
|
||||
msgid "Norwegian Nynorsk"
|
||||
msgstr "норвешки модерни"
|
||||
msgstr "Норвешки модерни"
|
||||
|
||||
#. name for nnp
|
||||
msgid "Naga; Wancho"
|
||||
@ -18830,7 +18830,7 @@ msgstr ""
|
||||
|
||||
#. name for nob
|
||||
msgid "Norwegian Bokmål"
|
||||
msgstr ""
|
||||
msgstr "Норвешки (књижевни)"
|
||||
|
||||
#. name for noc
|
||||
msgid "Nuk"
|
||||
@ -18886,7 +18886,7 @@ msgstr ""
|
||||
|
||||
#. name for nor
|
||||
msgid "Norwegian"
|
||||
msgstr "норвешки"
|
||||
msgstr "Норвешки"
|
||||
|
||||
#. name for nos
|
||||
msgid "Nisu; Eastern"
|
||||
@ -19066,7 +19066,7 @@ msgstr ""
|
||||
|
||||
#. name for nsl
|
||||
msgid "Norwegian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Норвешки језик"
|
||||
|
||||
#. name for nsm
|
||||
msgid "Naga; Sumi"
|
||||
@ -20406,7 +20406,7 @@ msgstr ""
|
||||
|
||||
#. name for pdc
|
||||
msgid "German; Pennsylvania"
|
||||
msgstr ""
|
||||
msgstr "Немачки ; Пенсилванија"
|
||||
|
||||
#. name for pdi
|
||||
msgid "Pa Di"
|
||||
@ -22086,7 +22086,7 @@ msgstr ""
|
||||
|
||||
#. name for rmg
|
||||
msgid "Norwegian; Traveller"
|
||||
msgstr ""
|
||||
msgstr "Норвешки; путнички"
|
||||
|
||||
#. name for rmh
|
||||
msgid "Murkim"
|
||||
@ -22871,7 +22871,7 @@ msgstr ""
|
||||
|
||||
#. name for sgg
|
||||
msgid "Swiss-German Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Швајцарско-Немачки језик"
|
||||
|
||||
#. name for sgh
|
||||
msgid "Shughni"
|
||||
|
@ -10,14 +10,14 @@ msgstr ""
|
||||
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
|
||||
"devel@lists.alioth.debian.org>\n"
|
||||
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
|
||||
"PO-Revision-Date: 2012-04-22 07:11+0000\n"
|
||||
"PO-Revision-Date: 2012-05-12 10:25+0000\n"
|
||||
"Last-Translator: kulkke <Unknown>\n"
|
||||
"Language-Team: Turkish <gnome-turk@gnome.org>\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"X-Launchpad-Export-Date: 2012-04-23 04:45+0000\n"
|
||||
"X-Generator: Launchpad (build 15135)\n"
|
||||
"X-Launchpad-Export-Date: 2012-05-13 04:43+0000\n"
|
||||
"X-Generator: Launchpad (build 15225)\n"
|
||||
"Language: tr\n"
|
||||
|
||||
#. name for aaa
|
||||
@ -406,7 +406,7 @@ msgstr ""
|
||||
|
||||
#. name for aed
|
||||
msgid "Argentine Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Arjantin İşaret Dili"
|
||||
|
||||
#. name for aee
|
||||
msgid "Pashayi; Northeast"
|
||||
@ -1554,7 +1554,7 @@ msgstr "Dano"
|
||||
|
||||
#. name for asp
|
||||
msgid "Algerian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Cezayir İşaret Dili"
|
||||
|
||||
#. name for asq
|
||||
msgid "Austrian Sign Language"
|
||||
@ -2578,7 +2578,7 @@ msgstr "Blafe"
|
||||
|
||||
#. name for bfi
|
||||
msgid "British Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Britanya İşaret Dili"
|
||||
|
||||
#. name for bfj
|
||||
msgid "Bafanji"
|
||||
@ -4167,7 +4167,7 @@ msgstr "Bukat"
|
||||
|
||||
#. name for bvl
|
||||
msgid "Bolivian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Bolivya İşaret Dili"
|
||||
|
||||
#. name for bvm
|
||||
msgid "Bamunka"
|
||||
@ -4587,7 +4587,7 @@ msgstr "Biri"
|
||||
|
||||
#. name for bzs
|
||||
msgid "Brazilian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Brezilya İşaret Dili"
|
||||
|
||||
#. name for bzt
|
||||
msgid "Brithenig"
|
||||
@ -5623,11 +5623,11 @@ msgstr ""
|
||||
|
||||
#. name for csf
|
||||
msgid "Cuba Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Küba İşaret Dili"
|
||||
|
||||
#. name for csg
|
||||
msgid "Chilean Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Şili İşaret Dili"
|
||||
|
||||
#. name for csh
|
||||
msgid "Chin; Asho"
|
||||
@ -5651,7 +5651,7 @@ msgstr ""
|
||||
|
||||
#. name for csn
|
||||
msgid "Colombian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Kolombiya İşaret Dili"
|
||||
|
||||
#. name for cso
|
||||
msgid "Chinantec; Sochiapan"
|
||||
@ -5663,7 +5663,7 @@ msgstr ""
|
||||
|
||||
#. name for csr
|
||||
msgid "Costa Rican Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Kosta Rika İşaret Dili"
|
||||
|
||||
#. name for css
|
||||
msgid "Ohlone; Southern"
|
||||
@ -7347,7 +7347,7 @@ msgstr ""
|
||||
|
||||
#. name for esl
|
||||
msgid "Egypt Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Mısır İşaret Dili"
|
||||
|
||||
#. name for esm
|
||||
msgid "Esuma"
|
||||
@ -7551,7 +7551,7 @@ msgstr ""
|
||||
|
||||
#. name for fcs
|
||||
msgid "Quebec Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Quebec İşaret Dili"
|
||||
|
||||
#. name for fer
|
||||
msgid "Feroge"
|
||||
@ -8806,7 +8806,7 @@ msgstr ""
|
||||
|
||||
#. name for gsm
|
||||
msgid "Guatemalan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Guatemala İşaret Dili"
|
||||
|
||||
#. name for gsn
|
||||
msgid "Gusan"
|
||||
@ -10895,7 +10895,7 @@ msgstr ""
|
||||
|
||||
#. name for jos
|
||||
msgid "Jordanian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Ürdün İşaret Dili"
|
||||
|
||||
#. name for jow
|
||||
msgid "Jowulu"
|
||||
@ -13847,7 +13847,7 @@ msgstr ""
|
||||
|
||||
#. name for lbs
|
||||
msgid "Libyan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Libya İşaret Dili"
|
||||
|
||||
#. name for lbt
|
||||
msgid "Lachi"
|
||||
@ -15591,7 +15591,7 @@ msgstr ""
|
||||
|
||||
#. name for mfs
|
||||
msgid "Mexican Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Meksika İşaret Dili"
|
||||
|
||||
#. name for mft
|
||||
msgid "Mokerang"
|
||||
@ -17055,7 +17055,7 @@ msgstr ""
|
||||
|
||||
#. name for mul
|
||||
msgid "Multiple languages"
|
||||
msgstr ""
|
||||
msgstr "Çoklu diller"
|
||||
|
||||
#. name for mum
|
||||
msgid "Maiwala"
|
||||
@ -17867,7 +17867,7 @@ msgstr ""
|
||||
|
||||
#. name for ncs
|
||||
msgid "Nicaraguan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Nikaragua İşaret Dili"
|
||||
|
||||
#. name for nct
|
||||
msgid "Naga; Chothe"
|
||||
@ -19495,7 +19495,7 @@ msgstr ""
|
||||
|
||||
#. name for nzs
|
||||
msgid "New Zealand Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Yeni Zelanda İşaret Dili"
|
||||
|
||||
#. name for nzu
|
||||
msgid "Teke-Nzikou"
|
||||
@ -21219,7 +21219,7 @@ msgstr ""
|
||||
|
||||
#. name for prl
|
||||
msgid "Peruvian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Peru İşaret Dili"
|
||||
|
||||
#. name for prm
|
||||
msgid "Kibiri"
|
||||
@ -22699,7 +22699,7 @@ msgstr ""
|
||||
|
||||
#. name for sdl
|
||||
msgid "Saudi Arabian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Suudi Arabistan İşaret Dili"
|
||||
|
||||
#. name for sdm
|
||||
msgid "Semandang"
|
||||
@ -22847,7 +22847,7 @@ msgstr ""
|
||||
|
||||
#. name for sfs
|
||||
msgid "South African Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Güney Afrika İşaret Dili"
|
||||
|
||||
#. name for sfw
|
||||
msgid "Sehwi"
|
||||
@ -25943,7 +25943,7 @@ msgstr ""
|
||||
|
||||
#. name for tse
|
||||
msgid "Tunisian Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Tunus İşaret Dili"
|
||||
|
||||
#. name for tsf
|
||||
msgid "Tamang; Southwestern"
|
||||
@ -27348,7 +27348,7 @@ msgstr ""
|
||||
|
||||
#. name for vsl
|
||||
msgid "Venezuelan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Venezuela İşaret Dili"
|
||||
|
||||
#. name for vsv
|
||||
msgid "Valencian Sign Language"
|
||||
@ -28760,7 +28760,7 @@ msgstr ""
|
||||
|
||||
#. name for xms
|
||||
msgid "Moroccan Sign Language"
|
||||
msgstr ""
|
||||
msgstr "Fas İşaret Dili"
|
||||
|
||||
#. name for xmt
|
||||
msgid "Matbat"
|
||||
@ -29540,7 +29540,7 @@ msgstr ""
|
||||
|
||||
#. name for yid
|
||||
msgid "Yiddish"
|
||||
msgstr "Yiddiş"
|
||||
msgstr "Yidiş"
|
||||
|
||||
#. name for yif
|
||||
msgid "Ache"
|
||||
|
@ -26,7 +26,7 @@ def get_opts_from_parser(parser):
|
||||
class Coffee(Command): # {{{
|
||||
|
||||
description = 'Compile coffeescript files into javascript'
|
||||
COFFEE_DIRS = {'ebooks/oeb/display': 'display'}
|
||||
COFFEE_DIRS = ('ebooks/oeb/display',)
|
||||
|
||||
def add_options(self, parser):
|
||||
parser.add_option('--watch', '-w', action='store_true', default=False,
|
||||
@ -47,49 +47,69 @@ class Coffee(Command): # {{{
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
def show_js(self, jsfile):
|
||||
def show_js(self, raw):
|
||||
from pygments.lexers import JavascriptLexer
|
||||
from pygments.formatters import TerminalFormatter
|
||||
from pygments import highlight
|
||||
with open(jsfile, 'rb') as f:
|
||||
raw = f.read()
|
||||
print highlight(raw, JavascriptLexer(), TerminalFormatter())
|
||||
|
||||
def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
|
||||
for toplevel, dest in self.COFFEE_DIRS.iteritems():
|
||||
dest = self.j(self.RESOURCES, dest)
|
||||
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
|
||||
js = self.j(dest, os.path.basename(x.rpartition('.')[0]+'.js'))
|
||||
if self.newer(js, x):
|
||||
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
|
||||
timestamp else '', os.path.basename(x)))
|
||||
try:
|
||||
cs = subprocess.check_output(self.compiler +
|
||||
[x]).decode('utf-8')
|
||||
except Exception as e:
|
||||
print ('\n\tCompilation of %s failed'%os.path.basename(x))
|
||||
print (e)
|
||||
if ignore_errors:
|
||||
with open(js, 'wb') as f:
|
||||
f.write('# Compilation from coffeescript failed')
|
||||
else:
|
||||
raise SystemExit(1)
|
||||
else:
|
||||
with open(js, 'wb') as f:
|
||||
f.write(cs.encode('utf-8'))
|
||||
if opts.show_js:
|
||||
self.show_js(js)
|
||||
print ('#'*80)
|
||||
print ('#'*80)
|
||||
src_files = {}
|
||||
for src in self.COFFEE_DIRS:
|
||||
for f in glob.glob(self.j(self.SRC, __appname__, src,
|
||||
'*.coffee')):
|
||||
bn = os.path.basename(f).rpartition('.')[0]
|
||||
arcname = src.replace('/', '.') + '.' + bn + '.js'
|
||||
src_files[arcname] = (f, os.stat(f).st_mtime)
|
||||
|
||||
existing = {}
|
||||
dest = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
|
||||
if os.path.exists(dest):
|
||||
with zipfile.ZipFile(dest, 'r') as zf:
|
||||
for info in zf.infolist():
|
||||
mtime = time.mktime(info.date_time + (0, 0, -1))
|
||||
arcname = info.filename
|
||||
if (arcname in src_files and src_files[arcname][1] <
|
||||
mtime):
|
||||
existing[arcname] = (zf.read(info), info)
|
||||
|
||||
todo = set(src_files) - set(existing)
|
||||
updated = {}
|
||||
for arcname in todo:
|
||||
name = arcname.rpartition('.')[0]
|
||||
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
|
||||
timestamp else '', name))
|
||||
src = src_files[arcname][0]
|
||||
try:
|
||||
js = subprocess.check_output(self.compiler +
|
||||
[src]).decode('utf-8')
|
||||
except Exception as e:
|
||||
print ('\n\tCompilation of %s failed'%name)
|
||||
print (e)
|
||||
if ignore_errors:
|
||||
js = u'# Compilation from coffeescript failed'
|
||||
else:
|
||||
raise SystemExit(1)
|
||||
else:
|
||||
if opts.show_js:
|
||||
self.show_js(js)
|
||||
print ('#'*80)
|
||||
print ('#'*80)
|
||||
zi = zipfile.ZipInfo()
|
||||
zi.filename = arcname
|
||||
zi.date_time = time.localtime()[:6]
|
||||
updated[arcname] = (js.encode('utf-8'), zi)
|
||||
if updated:
|
||||
with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
|
||||
for raw, zi in updated.itervalues():
|
||||
zf.writestr(zi, raw)
|
||||
for raw, zi in existing.itervalues():
|
||||
zf.writestr(zi, raw)
|
||||
|
||||
def clean(self):
|
||||
for toplevel, dest in self.COFFEE_DIRS.iteritems():
|
||||
dest = self.j(self.RESOURCES, dest)
|
||||
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
|
||||
x = x.rpartition('.')[0] + '.js'
|
||||
x = self.j(dest, os.path.basename(x))
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
x = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
# }}}
|
||||
|
||||
class Kakasi(Command): # {{{
|
||||
|
@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, re, time, random, __builtin__, warnings
|
||||
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
|
||||
__builtin__.__dict__['dynamic_property'] = lambda func: func(None)
|
||||
from math import floor
|
||||
from functools import partial
|
||||
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 8, 49)
|
||||
numeric_version = (0, 8, 52)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -421,6 +421,16 @@ class EPUBMetadataWriter(MetadataWriterPlugin):
|
||||
from calibre.ebooks.metadata.epub import set_metadata
|
||||
set_metadata(stream, mi, apply_null=self.apply_null)
|
||||
|
||||
class FB2MetadataWriter(MetadataWriterPlugin):
|
||||
|
||||
name = 'Set FB2 metadata'
|
||||
file_types = set(['fb2'])
|
||||
description = _('Set metadata in %s files')%'FB2'
|
||||
|
||||
def set_metadata(self, stream, mi, type):
|
||||
from calibre.ebooks.metadata.fb2 import set_metadata
|
||||
set_metadata(stream, mi, apply_null=self.apply_null)
|
||||
|
||||
class HTMLZMetadataWriter(MetadataWriterPlugin):
|
||||
|
||||
name = 'Set HTMLZ metadata'
|
||||
@ -1321,15 +1331,15 @@ class StoreEbookscomStore(StoreBase):
|
||||
formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
|
||||
affiliate = True
|
||||
|
||||
class StoreEBookShoppeUKStore(StoreBase):
|
||||
name = 'ebookShoppe UK'
|
||||
author = u'Charles Haley'
|
||||
description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.'
|
||||
actual_plugin = 'calibre.gui2.store.stores.ebookshoppe_uk_plugin:EBookShoppeUKStore'
|
||||
|
||||
headquarters = 'UK'
|
||||
formats = ['EPUB', 'PDF']
|
||||
affiliate = True
|
||||
# class StoreEBookShoppeUKStore(StoreBase):
|
||||
# name = 'ebookShoppe UK'
|
||||
# author = u'Charles Haley'
|
||||
# description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.'
|
||||
# actual_plugin = 'calibre.gui2.store.stores.ebookshoppe_uk_plugin:EBookShoppeUKStore'
|
||||
#
|
||||
# headquarters = 'UK'
|
||||
# formats = ['EPUB', 'PDF']
|
||||
# affiliate = True
|
||||
|
||||
class StoreEHarlequinStore(StoreBase):
|
||||
name = 'eHarlequin'
|
||||
@ -1613,7 +1623,6 @@ plugins += [
|
||||
StoreEbookNLStore,
|
||||
StoreEbookpointStore,
|
||||
StoreEbookscomStore,
|
||||
StoreEBookShoppeUKStore,
|
||||
StoreEHarlequinStore,
|
||||
StoreEKnigiStore,
|
||||
StoreEscapeMagazineStore,
|
||||
|
@ -295,3 +295,17 @@ class OutputFormatPlugin(Plugin):
|
||||
return self.oeb.metadata.publication_type and \
|
||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
||||
|
||||
def specialize_css_for_output(self, log, opts, item, stylizer):
|
||||
'''
|
||||
Can be used to make changes to the css during the CSS flattening
|
||||
process.
|
||||
|
||||
:param item: The item (HTML file) being processed
|
||||
:param stylizer: A Stylizer object containing the flattened styles for
|
||||
item. You can get the style for any element by
|
||||
stylizer.style(element).
|
||||
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
|
@ -59,9 +59,7 @@ Run an embedded python interpreter.
|
||||
'files and metadata, which you can edit using standard HTML '
|
||||
'editing tools, and then rebuilds the file from the edited HTML. '
|
||||
'Makes no additional changes to the HTML, unlike a full calibre '
|
||||
'conversion). Note that this tool will try to open the '
|
||||
'folder containing the HTML files in the editor pointed to by the'
|
||||
' EDITOR environment variable.')
|
||||
'conversion).')
|
||||
|
||||
parser.add_option('--test-build', help='Test binary modules in build',
|
||||
action='store_true', default=False)
|
||||
@ -184,6 +182,12 @@ def main(args=sys.argv):
|
||||
from calibre.constants import debug
|
||||
debug()
|
||||
if len(args) > 2 and args[1] in ('-e', '--exec-file'):
|
||||
|
||||
# Load all plugins user defined plugins so the script can import from the
|
||||
# calibre_plugins namespace
|
||||
import calibre.customize.ui as dummy
|
||||
dummy
|
||||
|
||||
sys.argv = [args[2]] + args[3:]
|
||||
ef = os.path.abspath(args[2])
|
||||
base = os.path.dirname(ef)
|
||||
@ -222,7 +226,7 @@ def main(args=sys.argv):
|
||||
from calibre.utils.pyconsole.main import main
|
||||
main()
|
||||
elif opts.command:
|
||||
sys.argv = args[:1]
|
||||
sys.argv = args
|
||||
exec opts.command
|
||||
elif opts.debug_device_driver:
|
||||
debug_device_driver()
|
||||
|
@ -57,6 +57,7 @@ class ANDROID(USBMS):
|
||||
0x4316 : [0x216],
|
||||
0x42d6 : [0x216],
|
||||
0x42d7 : [0x216],
|
||||
0x42f7 : [0x216],
|
||||
},
|
||||
# Freescale
|
||||
0x15a2 : {
|
||||
@ -177,7 +178,7 @@ class ANDROID(USBMS):
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
|
||||
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
|
||||
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
|
||||
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C']
|
||||
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD']
|
||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
@ -193,7 +194,7 @@ class ANDROID(USBMS):
|
||||
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
|
||||
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
|
||||
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
|
||||
'GT-S5830L_CARD']
|
||||
'GT-S5830L_CARD', 'UNIVERSE', 'XT875']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
@ -201,7 +202,8 @@ class ANDROID(USBMS):
|
||||
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
|
||||
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
|
||||
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
|
||||
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER']
|
||||
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
|
||||
'UMS_COMPOSITE']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -92,6 +92,10 @@ class POCKETBOOK360(EB600):
|
||||
name = 'PocketBook 360 Device Interface'
|
||||
|
||||
gui_name = 'PocketBook 360'
|
||||
VENDOR_ID = [0x1f85, 0x525]
|
||||
PRODUCT_ID = [0x1688, 0xa4a5]
|
||||
BCD = [0x110]
|
||||
|
||||
|
||||
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']
|
||||
|
||||
|
@ -13,6 +13,7 @@ import datetime, os, re, sys, json, hashlib
|
||||
from calibre.devices.kindle.bookmark import Bookmark
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
from calibre import strftime
|
||||
from calibre.utils.logging import default_log
|
||||
|
||||
'''
|
||||
Notes on collections:
|
||||
@ -324,6 +325,7 @@ class KINDLE2(KINDLE):
|
||||
OPT_APNX = 0
|
||||
OPT_APNX_ACCURATE = 1
|
||||
OPT_APNX_CUST_COL = 2
|
||||
THUMBNAIL_HEIGHT = 180
|
||||
|
||||
def formats_to_scan_for(self):
|
||||
ans = USBMS.formats_to_scan_for(self) | {'azw3'}
|
||||
@ -375,8 +377,36 @@ class KINDLE2(KINDLE):
|
||||
|
||||
def upload_cover(self, path, filename, metadata, filepath):
|
||||
'''
|
||||
Hijacking this function to write the apnx file.
|
||||
Upload sidecar files: cover thumbnails and page count
|
||||
'''
|
||||
# Upload the cover thumbnail
|
||||
try:
|
||||
self.upload_kindle_thumbnail(metadata, filepath)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Upload the apnx file
|
||||
self.upload_apnx(filename, metadata, filepath)
|
||||
|
||||
def upload_kindle_thumbnail(self, metadata, filepath):
|
||||
coverdata = getattr(metadata, 'thumbnail', None)
|
||||
if not coverdata or not coverdata[2]:
|
||||
return
|
||||
thumb_dir = os.path.join(self._main_prefix, 'system', 'thumbnails')
|
||||
if not os.path.exists(thumb_dir): return
|
||||
|
||||
from calibre.ebooks.mobi.reader.headers import MetadataHeader
|
||||
with lopen(filepath, 'rb') as f:
|
||||
mh = MetadataHeader(f, default_log)
|
||||
if mh.exth is None or not mh.exth.uuid or not mh.exth.cdetype:
|
||||
return
|
||||
thumbfile = os.path.join(thumb_dir,
|
||||
'thumbnail_{uuid}_{cdetype}_portrait.jpg'.format(
|
||||
uuid=mh.exth.uuid, cdetype=mh.exth.cdetype))
|
||||
with open(thumbfile, 'wb') as f:
|
||||
f.write(coverdata[2])
|
||||
|
||||
def upload_apnx(self, filename, metadata, filepath):
|
||||
from calibre.devices.kindle.apnx import APNXBuilder
|
||||
|
||||
opts = self.settings()
|
||||
@ -422,6 +452,9 @@ class KINDLE_DX(KINDLE2):
|
||||
PRODUCT_ID = [0x0003]
|
||||
BCD = [0x0100]
|
||||
|
||||
def upload_kindle_thumbnail(self, metadata, filepath):
|
||||
pass
|
||||
|
||||
class KINDLE_FIRE(KINDLE2):
|
||||
|
||||
name = 'Kindle Fire Device Interface'
|
||||
@ -440,4 +473,6 @@ class KINDLE_FIRE(KINDLE2):
|
||||
VENDOR_NAME = 'AMAZON'
|
||||
WINDOWS_MAIN_MEM = 'KINDLE'
|
||||
|
||||
def upload_kindle_thumbnail(self, metadata, filepath):
|
||||
pass
|
||||
|
||||
|
@ -57,10 +57,11 @@ class PICO(NEWSMY):
|
||||
gui_name = 'Pico'
|
||||
description = _('Communicate with the Pico reader.')
|
||||
|
||||
VENDOR_NAME = ['TECLAST', 'IMAGIN', 'LASER-']
|
||||
VENDOR_NAME = ['TECLAST', 'IMAGIN', 'LASER-', '']
|
||||
WINDOWS_MAIN_MEM = ['USBDISK__USER', 'EB720']
|
||||
EBOOK_DIR_MAIN = 'Books'
|
||||
FORMATS = ['EPUB', 'FB2', 'TXT', 'LRC', 'PDB', 'PDF', 'HTML', 'WTXT']
|
||||
SCAN_FROM_ROOT = True
|
||||
|
||||
class IPAPYRUS(TECLAST_K3):
|
||||
|
||||
|
@ -155,7 +155,11 @@ class CHMReader(CHMFile):
|
||||
self.hhc_path = f
|
||||
break
|
||||
if self.hhc_path not in files and files:
|
||||
self.hhc_path = files[0]
|
||||
for f in files:
|
||||
if f.partition('.')[-1].lower() in {'html', 'htm', 'xhtm',
|
||||
'xhtml'}:
|
||||
self.hhc_path = f
|
||||
break
|
||||
|
||||
if self.hhc_path == '.hhc' and self.hhc_path not in files:
|
||||
from calibre import walk
|
||||
@ -165,6 +169,9 @@ class CHMReader(CHMFile):
|
||||
self.hhc_path = os.path.relpath(x, output_dir)
|
||||
break
|
||||
|
||||
if self.hhc_path not in files and files:
|
||||
self.hhc_path = files[0]
|
||||
|
||||
def _reformat(self, data, htmlpath):
|
||||
if self.input_encoding:
|
||||
data = data.decode(self.input_encoding)
|
||||
@ -241,7 +248,10 @@ class CHMReader(CHMFile):
|
||||
except:
|
||||
pass
|
||||
# do not prettify, it would reformat the <pre> tags!
|
||||
return str(soup)
|
||||
try:
|
||||
return str(soup)
|
||||
except RuntimeError:
|
||||
return data
|
||||
|
||||
def Contents(self):
|
||||
if self._contents is not None:
|
||||
|
@ -1,4 +1,25 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
class ConversionUserFeedBack(Exception):
|
||||
|
||||
def __init__(self, title, msg, level='info', det_msg=''):
|
||||
''' Show a simple message to the user
|
||||
|
||||
:param title: The title (very short description)
|
||||
:param msg: The message to show the user
|
||||
:param level: Must be one of 'info', 'warn' or 'error'
|
||||
:param det_msg: Optional detailed message to show the user
|
||||
'''
|
||||
import json
|
||||
Exception.__init__(self, json.dumps({'msg':msg, 'level':level,
|
||||
'det_msg':det_msg, 'title':title}))
|
||||
self.title, self.msg, self.det_msg = title, msg, det_msg
|
||||
self.level = level
|
||||
|
||||
|
@ -15,6 +15,7 @@ from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre import patheq
|
||||
from calibre.ebooks.conversion import ConversionUserFeedBack
|
||||
|
||||
USAGE = '%prog ' + _('''\
|
||||
input_file output_file [options]
|
||||
@ -304,7 +305,10 @@ def read_sr_patterns(path, log=None):
|
||||
def main(args=sys.argv):
|
||||
log = Log()
|
||||
parser, plumber = create_option_parser(args, log)
|
||||
opts = parser.parse_args(args)[0]
|
||||
opts, leftover_args = parser.parse_args(args)
|
||||
if len(leftover_args) > 3:
|
||||
log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
|
||||
return 1
|
||||
for x in ('read_metadata_from_opf', 'cover'):
|
||||
if getattr(opts, x, None) is not None:
|
||||
setattr(opts, x, abspath(getattr(opts, x)))
|
||||
@ -317,7 +321,16 @@ def main(args=sys.argv):
|
||||
if n.dest]
|
||||
plumber.merge_ui_recommendations(recommendations)
|
||||
|
||||
plumber.run()
|
||||
try:
|
||||
plumber.run()
|
||||
except ConversionUserFeedBack as e:
|
||||
ll = {'info': log.info, 'warn': log.warn,
|
||||
'error':log.error}.get(e.level, log.info)
|
||||
ll(e.title)
|
||||
if e.det_msg:
|
||||
log.debug(e.detmsg)
|
||||
ll(e.msg)
|
||||
raise SystemExit(1)
|
||||
|
||||
log(_('Output saved to'), ' ', plumber.output)
|
||||
|
||||
|
@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
return False
|
||||
|
||||
def rationalize_cover(self, opf, log):
|
||||
removed = None
|
||||
from lxml import etree
|
||||
guide_cover, guide_elem = None, None
|
||||
for guide_elem in opf.iterguide():
|
||||
@ -91,6 +92,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
# specially
|
||||
if not self.for_viewer:
|
||||
spine[0].getparent().remove(spine[0])
|
||||
removed = guide_cover
|
||||
guide_elem.set('href', 'calibre_raster_cover.jpg')
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
t = etree.SubElement(elem[0].getparent(), OPF('item'),
|
||||
@ -109,6 +111,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
if renderer is not None:
|
||||
open('calibre_raster_cover.jpg', 'wb').write(
|
||||
renderer)
|
||||
return removed
|
||||
|
||||
def find_opf(self):
|
||||
from lxml import etree
|
||||
@ -170,7 +173,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
for elem in opf.iterguide():
|
||||
elem.set('href', delta+elem.get('href'))
|
||||
|
||||
self.rationalize_cover(opf, log)
|
||||
self.removed_cover = self.rationalize_cover(opf, log)
|
||||
|
||||
self.optimize_opf_parsing = opf
|
||||
for x in opf.itermanifest():
|
||||
@ -198,3 +201,17 @@ class EPUBInput(InputFormatPlugin):
|
||||
nopf.write(opf.render())
|
||||
|
||||
return os.path.abspath(u'content.opf')
|
||||
|
||||
def postprocess_book(self, oeb, opts, log):
|
||||
rc = getattr(self, 'removed_cover', None)
|
||||
if rc:
|
||||
cover_toc_item = None
|
||||
for item in oeb.toc.iterdescendants():
|
||||
if item.href and item.href.partition('#')[0] == rc:
|
||||
cover_toc_item = item
|
||||
break
|
||||
spine = {x.href for x in oeb.spine}
|
||||
if (cover_toc_item is not None and cover_toc_item not in spine):
|
||||
oeb.toc.item_that_refers_to_cover = cover_toc_item
|
||||
|
||||
|
||||
|
@ -312,13 +312,9 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
Perform various markup transforms to get the output to render correctly
|
||||
in the quirky ADE.
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, barename, urlunquote
|
||||
|
||||
stylesheet = None
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type.lower() in OEB_STYLES:
|
||||
stylesheet = item
|
||||
break
|
||||
stylesheet = self.oeb.manifest.main_stylesheet
|
||||
|
||||
# ADE cries big wet tears when it encounters an invalid fragment
|
||||
# identifier in the NCX toc.
|
||||
@ -397,8 +393,14 @@ class EPUBOutput(OutputFormatPlugin):
|
||||
for tag in XPath('//h:body/descendant::h:script')(root):
|
||||
tag.getparent().remove(tag)
|
||||
|
||||
formchildren = XPath('./h:input|./h:button|./h:textarea|'
|
||||
'./h:label|./h:fieldset|./h:legend')
|
||||
for tag in XPath('//h:form')(root):
|
||||
tag.getparent().remove(tag)
|
||||
if formchildren(tag):
|
||||
tag.getparent().remove(tag)
|
||||
else:
|
||||
# Not a real form
|
||||
tag.tag = XHTML('div')
|
||||
|
||||
for tag in XPath('//h:center')(root):
|
||||
tag.tag = XHTML('div')
|
||||
|
@ -12,7 +12,7 @@ class MOBIInput(InputFormatPlugin):
|
||||
name = 'MOBI Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
|
||||
file_types = set(['mobi', 'prc', 'azw', 'azw3'])
|
||||
file_types = set(['mobi', 'prc', 'azw', 'azw3', 'pobi'])
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
|
@ -232,6 +232,10 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
writer(oeb, output_path)
|
||||
extract_mobi(output_path, opts)
|
||||
|
||||
def specialize_css_for_output(self, log, opts, item, stylizer):
|
||||
from calibre.ebooks.mobi.writer8.cleanup import CSSCleanup
|
||||
CSSCleanup(log, opts)(item, stylizer)
|
||||
|
||||
class AZW3Output(OutputFormatPlugin):
|
||||
|
||||
name = 'AZW3 Output'
|
||||
@ -254,9 +258,6 @@ class AZW3Output(OutputFormatPlugin):
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Disable compression of the file contents.')
|
||||
),
|
||||
OptionRecommendation(name='personal_doc', recommended_value='[PDOC]',
|
||||
help=_('Tag marking book to be filed with Personal Docs')
|
||||
),
|
||||
OptionRecommendation(name='mobi_toc_at_start',
|
||||
recommended_value=False,
|
||||
help=_('When adding the Table of Contents to the book, add it at the start of the '
|
||||
@ -298,4 +299,8 @@ class AZW3Output(OutputFormatPlugin):
|
||||
kf8.write(output_path)
|
||||
extract_mobi(output_path, opts)
|
||||
|
||||
def specialize_css_for_output(self, log, opts, item, stylizer):
|
||||
from calibre.ebooks.mobi.writer8.cleanup import CSSCleanup
|
||||
CSSCleanup(log, opts)(item, stylizer)
|
||||
|
||||
|
||||
|
@ -99,12 +99,8 @@ class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
# Remove page-break-before on <body> element as it causes
|
||||
# blank pages in PDF Output
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES, XPath
|
||||
stylesheet = None
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type.lower() in OEB_STYLES:
|
||||
stylesheet = item
|
||||
break
|
||||
from calibre.ebooks.oeb.base import XPath
|
||||
stylesheet = self.oeb.manifest.main_stylesheet
|
||||
if stylesheet is not None:
|
||||
from cssutils.css import CSSRule
|
||||
classes = set(['.calibre'])
|
||||
|
@ -4,6 +4,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, sys, shutil, pprint
|
||||
from functools import partial
|
||||
|
||||
from calibre.customize.conversion import OptionRecommendation, DummyReporter
|
||||
from calibre.customize.ui import input_profiles, output_profiles, \
|
||||
@ -342,21 +343,25 @@ OptionRecommendation(name='remove_fake_margins',
|
||||
OptionRecommendation(name='margin_top',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the top margin in pts. Default is %default. '
|
||||
'Setting this to less than zero will cause no margin to be set. '
|
||||
'Note: 72 pts equals 1 inch')),
|
||||
|
||||
OptionRecommendation(name='margin_bottom',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the bottom margin in pts. Default is %default. '
|
||||
'Setting this to less than zero will cause no margin to be set. '
|
||||
'Note: 72 pts equals 1 inch')),
|
||||
|
||||
OptionRecommendation(name='margin_left',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the left margin in pts. Default is %default. '
|
||||
'Setting this to less than zero will cause no margin to be set. '
|
||||
'Note: 72 pts equals 1 inch')),
|
||||
|
||||
OptionRecommendation(name='margin_right',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the right margin in pts. Default is %default. '
|
||||
'Setting this to less than zero will cause no margin to be set. '
|
||||
'Note: 72 pts equals 1 inch')),
|
||||
|
||||
OptionRecommendation(name='change_justification',
|
||||
@ -884,7 +889,10 @@ OptionRecommendation(name='search_replace',
|
||||
self.log.debug('Resolved conversion options')
|
||||
try:
|
||||
self.log.debug('calibre version:', __version__)
|
||||
self.log.debug(pprint.pformat(self.opts.__dict__))
|
||||
odict = dict(self.opts.__dict__)
|
||||
for x in ('username', 'password'):
|
||||
odict.pop(x, None)
|
||||
self.log.debug(pprint.pformat(odict))
|
||||
except:
|
||||
self.log.exception('Failed to get resolved conversion options')
|
||||
|
||||
@ -1010,6 +1018,13 @@ OptionRecommendation(name='search_replace',
|
||||
pr(0.35)
|
||||
self.flush()
|
||||
|
||||
if self.output_plugin.file_type != 'epub':
|
||||
# Remove the toc reference to the html cover, if any, except for
|
||||
# epub, as the epub output plugin will do the right thing with it.
|
||||
item = getattr(self.oeb.toc, 'item_that_refers_to_cover', None)
|
||||
if item is not None and item.count() == 0:
|
||||
self.oeb.toc.remove(item)
|
||||
|
||||
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||
fbase = self.opts.base_font_size
|
||||
if fbase < 1e-4:
|
||||
@ -1061,7 +1076,9 @@ OptionRecommendation(name='search_replace',
|
||||
untable=self.output_plugin.file_type in ('mobi','lit'),
|
||||
unfloat=self.output_plugin.file_type in ('mobi', 'lit'),
|
||||
page_break_on_body=self.output_plugin.file_type in ('mobi',
|
||||
'lit'))
|
||||
'lit'),
|
||||
specializer=partial(self.output_plugin.specialize_css_for_output,
|
||||
self.log, self.opts))
|
||||
flattener(self.oeb, self.opts)
|
||||
|
||||
self.opts.insert_blank_line = oibl
|
||||
|
@ -148,7 +148,7 @@ class HeuristicProcessor(object):
|
||||
return wordcount.words
|
||||
|
||||
def markup_italicis(self, html):
|
||||
self.log.debug("\n\n\nitalicize debugging \n\n\n")
|
||||
#self.log.debug("\n\n\nitalicize debugging \n\n\n")
|
||||
ITALICIZE_WORDS = [
|
||||
'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
|
||||
'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.',
|
||||
@ -184,6 +184,9 @@ class HeuristicProcessor(object):
|
||||
except OverflowError:
|
||||
# match.group(0) was too large to be compiled into a regex
|
||||
continue
|
||||
except re.error:
|
||||
# the match was not a valid regular expression
|
||||
continue
|
||||
|
||||
return html
|
||||
|
||||
|
@ -113,6 +113,11 @@ class HTMLFile(object):
|
||||
raise IOError(msg)
|
||||
raise IgnoreFile(msg, err.errno)
|
||||
|
||||
if not src:
|
||||
if level == 0:
|
||||
raise ValueError('The file %s is empty'%self.path)
|
||||
self.is_binary = True
|
||||
|
||||
if not self.is_binary:
|
||||
if not encoding:
|
||||
encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1]
|
||||
|
@ -5,11 +5,15 @@ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>, '\
|
||||
'2008, Anatoly Shipitsin <norguhtar at gmail.com>'
|
||||
'''Read meta information from fb2 files'''
|
||||
|
||||
import os
|
||||
import datetime
|
||||
import os, random, datetime
|
||||
from functools import partial
|
||||
from string import ascii_letters, digits
|
||||
from base64 import b64encode
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
from calibre import guess_type, guess_all_extensions, prints, force_unicode
|
||||
from calibre.ebooks.metadata import MetaInformation, check_isbn
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
@ -22,6 +26,12 @@ NAMESPACES = {
|
||||
XPath = partial(etree.XPath, namespaces=NAMESPACES)
|
||||
tostring = partial(etree.tostring, method='text', encoding=unicode)
|
||||
|
||||
def FB2(tag):
|
||||
return '{%s}%s'%(NAMESPACES['fb2'], tag)
|
||||
|
||||
def XLINK(tag):
|
||||
return '{%s}%s'%(NAMESPACES['xlink'], tag)
|
||||
|
||||
def get_metadata(stream):
|
||||
''' Return fb2 metadata as a L{MetaInformation} object '''
|
||||
|
||||
@ -85,6 +95,7 @@ def _parse_authors(root):
|
||||
authors = []
|
||||
# pick up authors but only from 1 secrion <title-info>; otherwise it is not consistent!
|
||||
# Those are fallbacks: <src-title-info>, <document-info>
|
||||
author = None
|
||||
for author_sec in ['title-info', 'src-title-info', 'document-info']:
|
||||
for au in XPath('//fb2:%s/fb2:author'%author_sec)(root):
|
||||
author = _parse_author(au)
|
||||
@ -211,8 +222,8 @@ def _parse_publisher(root, mi):
|
||||
def _parse_pubdate(root, mi):
|
||||
year = XPath('number(//fb2:publish-info/fb2:year/text())')(root)
|
||||
if float.is_integer(year):
|
||||
# only year is available, so use 1-st of Jan
|
||||
mi.pubdate = datetime.date(int(year), 1, 1)
|
||||
# only year is available, so use 2nd of June
|
||||
mi.pubdate = datetime.date(int(year), 6, 2)
|
||||
|
||||
def _parse_timestamp(root, mi):
|
||||
#<date value="1996-12-03">03.12.1996</date>
|
||||
@ -239,3 +250,135 @@ def _get_fbroot(stream):
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
|
||||
root = etree.fromstring(raw, parser=parser)
|
||||
return root
|
||||
|
||||
def _clear_meta_tags(doc, tag):
|
||||
for parent in ('title-info', 'src-title-info', 'publish-info'):
|
||||
for x in XPath('//fb2:%s/fb2:%s'%(parent, tag))(doc):
|
||||
x.getparent().remove(x)
|
||||
|
||||
def _set_title(title_info, mi):
|
||||
if not mi.is_null('title'):
|
||||
_clear_meta_tags(title_info, 'book-title')
|
||||
title = _get_or_create(title_info, 'book-title')
|
||||
title.text = mi.title
|
||||
|
||||
def _text2fb2(parent, text):
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line:
|
||||
p = _create_tag(parent, 'p', at_start=False)
|
||||
p.text = line
|
||||
else:
|
||||
_create_tag(parent, 'empty-line', at_start=False)
|
||||
|
||||
def _set_comments(title_info, mi):
|
||||
if not mi.is_null('comments'):
|
||||
from calibre.utils.html2text import html2text
|
||||
_clear_meta_tags(title_info, 'annotation')
|
||||
title = _get_or_create(title_info, 'annotation')
|
||||
_text2fb2(title, html2text(mi.comments))
|
||||
|
||||
|
||||
def _set_authors(title_info, mi):
|
||||
if not mi.is_null('authors'):
|
||||
_clear_meta_tags(title_info, 'author')
|
||||
for author in mi.authors:
|
||||
author_parts = author.split()
|
||||
if not author_parts: continue
|
||||
atag = _create_tag(title_info, 'author')
|
||||
if len(author_parts) == 1:
|
||||
_create_tag(atag, 'nickname').text = author
|
||||
else:
|
||||
_create_tag(atag, 'first-name').text = author_parts[0]
|
||||
author_parts = author_parts[1:]
|
||||
if len(author_parts) > 1:
|
||||
_create_tag(atag, 'middle-name', at_start=False).text = author_parts[0]
|
||||
author_parts = author_parts[1:]
|
||||
if author_parts:
|
||||
_create_tag(atag, 'last-name', at_start=False).text = ' '.join(author_parts)
|
||||
|
||||
def _set_tags(title_info, mi):
|
||||
if not mi.is_null('tags'):
|
||||
_clear_meta_tags(title_info, 'genre')
|
||||
for t in mi.tags:
|
||||
tag = _create_tag(title_info, 'genre')
|
||||
tag.text = t
|
||||
|
||||
def _set_series(title_info, mi):
|
||||
if not mi.is_null('series'):
|
||||
_clear_meta_tags(title_info, 'sequence')
|
||||
seq = _get_or_create(title_info, 'sequence')
|
||||
seq.set('name', mi.series)
|
||||
try:
|
||||
seq.set('number', '%g'%mi.series_index)
|
||||
except:
|
||||
seq.set('number', '1')
|
||||
|
||||
def _rnd_name(size=8, chars=ascii_letters + digits):
|
||||
return ''.join(random.choice(chars) for x in range(size))
|
||||
|
||||
def _rnd_pic_file_name(prefix='calibre_cover_', size=32, ext='jpg'):
|
||||
return prefix + _rnd_name(size=size) + '.' + ext
|
||||
|
||||
def _encode_into_jpeg(data):
|
||||
data = save_cover_data_to(data, 'cover.jpg', return_data=True)
|
||||
return b64encode(data)
|
||||
|
||||
def _set_cover(title_info, mi):
|
||||
if not mi.is_null('cover_data') and mi.cover_data[1]:
|
||||
coverpage = _get_or_create(title_info, 'coverpage')
|
||||
cim_tag = _get_or_create(coverpage, 'image')
|
||||
if cim_tag.attrib.has_key(XLINK('href')):
|
||||
cim_filename = cim_tag.attrib[XLINK('href')][1:]
|
||||
else:
|
||||
cim_filename = _rnd_pic_file_name('cover')
|
||||
cim_tag.attrib[XLINK('href')] = '#' + cim_filename
|
||||
fb2_root = cim_tag.getroottree().getroot()
|
||||
cim_binary = _get_or_create(fb2_root, 'binary', attribs={'id': cim_filename}, at_start=False)
|
||||
cim_binary.attrib['content-type'] = 'image/jpeg'
|
||||
cim_binary.text = _encode_into_jpeg(mi.cover_data[1])
|
||||
|
||||
def _create_tag(parent, tag, attribs={}, at_start=True):
|
||||
ans = parent.makeelement(FB2(tag))
|
||||
ans.attrib.update(attribs)
|
||||
if at_start:
|
||||
parent.insert(0, ans)
|
||||
else:
|
||||
parent.append(ans)
|
||||
return ans
|
||||
|
||||
def _get_or_create(parent, tag, attribs={}, at_start=True):
|
||||
xpathstr='./fb2:'+tag
|
||||
for n, v in attribs.items():
|
||||
xpathstr += '[@%s="%s"]' % (n, v)
|
||||
ans = XPath(xpathstr)(parent)
|
||||
if ans:
|
||||
ans = ans[0]
|
||||
else:
|
||||
ans = _create_tag(parent, tag, attribs, at_start)
|
||||
return ans
|
||||
|
||||
def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
|
||||
stream.seek(0)
|
||||
root = _get_fbroot(stream)
|
||||
desc = _get_or_create(root, 'description')
|
||||
ti = _get_or_create(desc, 'title-info')
|
||||
|
||||
indent = ti.text
|
||||
|
||||
_set_comments(ti, mi)
|
||||
_set_series(ti, mi)
|
||||
_set_tags(ti, mi)
|
||||
_set_authors(ti, mi)
|
||||
_set_title(ti, mi)
|
||||
_set_cover(ti, mi)
|
||||
|
||||
for child in ti:
|
||||
child.tail = indent
|
||||
|
||||
stream.seek(0)
|
||||
stream.truncate()
|
||||
stream.write(etree.tostring(root, method='xml', encoding='utf-8',
|
||||
xml_declaration=True))
|
||||
|
||||
|
@ -18,7 +18,7 @@ from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
|
||||
fixauthors)
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.date import parse_only_date
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
class Worker(Thread): # Get details {{{
|
||||
@ -471,7 +471,7 @@ class Worker(Thread): # Get details {{{
|
||||
ans = x.tail
|
||||
date = ans.rpartition('(')[-1].replace(')', '').strip()
|
||||
date = self.delocalize_datestr(date)
|
||||
return parse_date(date, assume_utc=True)
|
||||
return parse_only_date(date, assume_utc=True)
|
||||
|
||||
def parse_language(self, pd):
|
||||
for x in reversed(pd.xpath(self.language_xpath)):
|
||||
|
@ -6,7 +6,6 @@ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
import datetime
|
||||
from urllib import quote_plus
|
||||
from Queue import Queue, Empty
|
||||
|
||||
@ -14,6 +13,7 @@ from calibre import as_unicode
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import parse_only_date
|
||||
|
||||
class Ozon(Source):
|
||||
name = 'OZON.ru'
|
||||
@ -454,9 +454,7 @@ def toPubdate(log, yearAsString): # {{{
|
||||
res = None
|
||||
if yearAsString:
|
||||
try:
|
||||
year = int(yearAsString)
|
||||
# only year is available, so use 1-st of Jan
|
||||
res = datetime.datetime(year, 1, 1)
|
||||
res = parse_only_date(yearAsString)
|
||||
except:
|
||||
log.error('cannot parse to date %s'%yearAsString)
|
||||
return res
|
||||
|
@ -306,10 +306,15 @@ class MOBIHeader(object): # {{{
|
||||
self.extra_data_flags = 0
|
||||
if self.has_extra_data_flags:
|
||||
self.unknown4 = self.raw[184:192]
|
||||
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
|
||||
self.raw, 192)
|
||||
if self.fdst_count <= 1:
|
||||
self.fdst_idx = NULL_INDEX
|
||||
if self.file_version < 8:
|
||||
self.first_text_record, self.last_text_record = \
|
||||
struct.unpack_from(b'>HH', self.raw, 192)
|
||||
self.fdst_count = struct.unpack_from(b'>L', self.raw, 196)
|
||||
else:
|
||||
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
|
||||
self.raw, 192)
|
||||
if self.fdst_count <= 1:
|
||||
self.fdst_idx = NULL_INDEX
|
||||
(self.fcis_number, self.fcis_count, self.flis_number,
|
||||
self.flis_count) = struct.unpack(b'>IIII',
|
||||
self.raw[200:216])
|
||||
@ -409,7 +414,11 @@ class MOBIHeader(object): # {{{
|
||||
a('DRM Flags: %r'%self.drm_flags)
|
||||
if self.has_extra_data_flags:
|
||||
a('Unknown4: %r'%self.unknown4)
|
||||
r('FDST Index', 'fdst_idx')
|
||||
if hasattr(self, 'first_text_record'):
|
||||
a('First content record: %d'%self.first_text_record)
|
||||
a('Last content record: %d'%self.last_text_record)
|
||||
else:
|
||||
r('FDST Index', 'fdst_idx')
|
||||
a('FDST Count: %d'% self.fdst_count)
|
||||
r('FCIS number', 'fcis_number')
|
||||
a('FCIS count: %d'% self.fcis_count)
|
||||
|
@ -159,7 +159,7 @@ class NCXIndex(Index):
|
||||
|
||||
if self.table is not None:
|
||||
NCXEntry = namedtuple('NCXEntry', 'index start length depth parent '
|
||||
'first_child last_child title pos_fid')
|
||||
'first_child last_child title pos_fid kind')
|
||||
|
||||
for num, x in enumerate(self.table.iteritems()):
|
||||
text, tag_map = x
|
||||
@ -192,7 +192,7 @@ class NCXIndex(Index):
|
||||
length=e['len'], depth=e['hlvl'], parent=refindx(e,
|
||||
'parent'), first_child=refindx(e, 'child1'),
|
||||
last_child=refindx(e, 'childn'), title=e['text'],
|
||||
pos_fid=e['pos_fid'])
|
||||
pos_fid=e['pos_fid'], kind=e['kind'])
|
||||
self.records.append(entry)
|
||||
|
||||
|
||||
|
@ -189,11 +189,11 @@ class MOBIFile(object):
|
||||
def read_tbs(self):
|
||||
from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC,
|
||||
collect_indexing_data, encode_strands_as_sequences,
|
||||
sequences_to_bytes)
|
||||
sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex)
|
||||
entry_map = []
|
||||
for index in self.ncx_index:
|
||||
vals = list(index)[:-1] + [None, None, None, None]
|
||||
entry_map.append(Entry(*vals))
|
||||
entry_map.append(Entry(*(vals[:12])))
|
||||
|
||||
|
||||
indexing_data = collect_indexing_data(entry_map, list(map(len,
|
||||
@ -206,6 +206,14 @@ class MOBIFile(object):
|
||||
the start of the text record.
|
||||
|
||||
''')]
|
||||
|
||||
tbs_type = 8
|
||||
try:
|
||||
calculate_all_tbs(indexing_data)
|
||||
except NegativeStrandIndex:
|
||||
calculate_all_tbs(indexing_data, tbs_type=5)
|
||||
tbs_type = 5
|
||||
|
||||
for i, strands in enumerate(indexing_data):
|
||||
rec = self.text_records[i]
|
||||
tbs_bytes = rec.trailing_data.get('indexing', b'')
|
||||
@ -236,8 +244,12 @@ class MOBIFile(object):
|
||||
desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1]))
|
||||
if tbs_bytes:
|
||||
desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes))
|
||||
calculated_sequences = encode_strands_as_sequences(strands)
|
||||
calculated_bytes = sequences_to_bytes(calculated_sequences)
|
||||
calculated_sequences = encode_strands_as_sequences(strands,
|
||||
tbs_type=tbs_type)
|
||||
try:
|
||||
calculated_bytes = sequences_to_bytes(calculated_sequences)
|
||||
except:
|
||||
calculated_bytes = b'failed to calculate tbs bytes'
|
||||
if calculated_bytes != otbs:
|
||||
print ('WARNING: TBS mismatch for record %d'%i)
|
||||
desc.append('WARNING: TBS mismatch!')
|
||||
|
@ -45,6 +45,10 @@ class EXTHHeader(object): # {{{
|
||||
elif idx == 202:
|
||||
self.thumbnail_offset, = struct.unpack('>L', content)
|
||||
elif idx == 501:
|
||||
try:
|
||||
self.cdetype = content.decode('ascii')
|
||||
except UnicodeDecodeError:
|
||||
self.cdetype = None
|
||||
# cdetype
|
||||
if content == b'EBSP':
|
||||
if not self.mi.tags:
|
||||
@ -109,8 +113,11 @@ class EXTHHeader(object): # {{{
|
||||
self.mi.isbn = raw
|
||||
except:
|
||||
pass
|
||||
elif idx == 113:
|
||||
pass # ASIN or UUID
|
||||
elif idx == 113: # ASIN or other id
|
||||
try:
|
||||
self.uuid = content.decode('ascii')
|
||||
except:
|
||||
self.uuid = None
|
||||
elif idx == 116:
|
||||
self.start_offset, = struct.unpack(b'>L', content)
|
||||
elif idx == 121:
|
||||
|
@ -111,7 +111,11 @@ def update_flow_links(mobi8_reader, resource_map, log):
|
||||
continue
|
||||
|
||||
if not isinstance(flow, unicode):
|
||||
flow = flow.decode(mr.header.codec)
|
||||
try:
|
||||
flow = flow.decode(mr.header.codec)
|
||||
except UnicodeDecodeError:
|
||||
log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
|
||||
flow = flow.decode(mr.header.codec, 'replace')
|
||||
|
||||
# links to raster image files from image tags
|
||||
# image_pattern
|
||||
|
@ -207,9 +207,9 @@ class Mobi8Reader(object):
|
||||
fname = 'svgimg' + nstr + '.svg'
|
||||
else:
|
||||
# search for CDATA and if exists inline it
|
||||
if flowpart.find('[CDATA[') >= 0:
|
||||
if flowpart.find(b'[CDATA[') >= 0:
|
||||
typ = 'css'
|
||||
flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n'
|
||||
flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
|
||||
format = 'inline'
|
||||
dir = None
|
||||
fname = None
|
||||
|
@ -31,6 +31,10 @@ def do_explode(path, dest):
|
||||
with CurrentDir(dest):
|
||||
mr = Mobi8Reader(mr, default_log)
|
||||
opf = os.path.abspath(mr())
|
||||
try:
|
||||
os.remove('debug-raw.html')
|
||||
except:
|
||||
pass
|
||||
|
||||
return opf
|
||||
|
||||
@ -52,7 +56,10 @@ def explode(path, dest, question=lambda x:True):
|
||||
kf8_type = header.kf8_type
|
||||
|
||||
if kf8_type is None:
|
||||
raise BadFormat('This MOBI file does not contain a KF8 format book')
|
||||
raise BadFormat(_('This MOBI file does not contain a KF8 format '
|
||||
'book. KF8 is the new format from Amazon. calibre can '
|
||||
'only tweak MOBI files that contain KF8 books. Older '
|
||||
'MOBI files without KF8 are not tweakable.'))
|
||||
|
||||
if kf8_type == 'joint':
|
||||
if not question(_('This MOBI file contains both KF8 and '
|
||||
@ -64,6 +71,14 @@ def explode(path, dest, question=lambda x:True):
|
||||
return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path,
|
||||
dest), no_output=True)['result']
|
||||
|
||||
def set_cover(oeb):
|
||||
if 'cover' not in oeb.guide or oeb.metadata['cover']: return
|
||||
cover = oeb.guide['cover']
|
||||
if cover.href in oeb.manifest.hrefs:
|
||||
item = oeb.manifest.hrefs[cover.href]
|
||||
oeb.metadata.clear('cover')
|
||||
oeb.metadata.add('cover', item.id)
|
||||
|
||||
def do_rebuild(opf, dest_path):
|
||||
plumber = Plumber(opf, dest_path, default_log)
|
||||
plumber.setup_options()
|
||||
@ -72,6 +87,7 @@ def do_rebuild(opf, dest_path):
|
||||
|
||||
plumber.opts.mobi_passthrough = True
|
||||
oeb = create_oebbook(default_log, opf, plumber.opts)
|
||||
set_cover(oeb)
|
||||
outp.convert(oeb, dest_path, inp, plumber.opts, default_log)
|
||||
|
||||
def rebuild(src_dir, dest_path):
|
||||
@ -79,6 +95,8 @@ def rebuild(src_dir, dest_path):
|
||||
if not opf:
|
||||
raise ValueError('No OPF file found in %s'%src_dir)
|
||||
opf = opf[0]
|
||||
# For debugging, uncomment the following line
|
||||
# def fork_job(a, b, args=None, no_output=True): do_rebuild(*args)
|
||||
fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path),
|
||||
no_output=True)
|
||||
|
||||
|
@ -382,6 +382,7 @@ class MobiWriter(object):
|
||||
first_image_record = len(self.records)
|
||||
self.resources.serialize(self.records, used_images)
|
||||
resource_record_count = len(self.records) - old
|
||||
last_content_record = len(self.records) - 1
|
||||
|
||||
# FCIS/FLIS (Seems to serve no purpose)
|
||||
flis_number = len(self.records)
|
||||
@ -406,7 +407,7 @@ class MobiWriter(object):
|
||||
# header
|
||||
header_fields['first_resource_record'] = first_image_record
|
||||
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
|
||||
header_fields['fdst_record'] = NULL_INDEX
|
||||
header_fields['fdst_record'] = pack(b'>HH', 1, last_content_record)
|
||||
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
|
||||
header_fields['flis_record'] = flis_number
|
||||
header_fields['fcis_record'] = fcis_number
|
||||
|
25
src/calibre/ebooks/mobi/writer8/cleanup.py
Normal file
25
src/calibre/ebooks/mobi/writer8/cleanup.py
Normal file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.oeb.base import XPath
|
||||
|
||||
class CSSCleanup(object):
|
||||
|
||||
def __init__(self, log, opts):
|
||||
self.log, self.opts = log, opts
|
||||
|
||||
def __call__(self, item, stylizer):
|
||||
if not hasattr(item.data, 'xpath'): return
|
||||
|
||||
# The Kindle touch displays all black pages if the height is set on
|
||||
# body
|
||||
for body in XPath('//h:body')(item.data):
|
||||
style = stylizer.style(body)
|
||||
style.drop('height')
|
||||
|
@ -56,7 +56,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
||||
items][:1]
|
||||
else:
|
||||
creators = [unicode(c) for c in items]
|
||||
items = ['; '.join(creators)]
|
||||
items = creators
|
||||
for item in items:
|
||||
data = unicode(item)
|
||||
if term != 'description':
|
||||
|
@ -316,6 +316,9 @@ class NCXIndex(Index):
|
||||
desc = entry.get('description', None)
|
||||
if desc:
|
||||
strings.append(desc)
|
||||
kind = entry.get('kind', None)
|
||||
if kind:
|
||||
strings.append(kind)
|
||||
self.cncx = CNCX(strings)
|
||||
|
||||
def to_entry(x):
|
||||
@ -324,7 +327,7 @@ class NCXIndex(Index):
|
||||
'first_child', 'last_child'):
|
||||
if f in x:
|
||||
ans[f] = x[f]
|
||||
for f in ('label', 'description', 'author'):
|
||||
for f in ('label', 'description', 'author', 'kind'):
|
||||
if f in x:
|
||||
ans[f] = self.cncx[x[f]]
|
||||
return ('%02x'%x['index'], ans)
|
||||
@ -333,3 +336,20 @@ class NCXIndex(Index):
|
||||
|
||||
|
||||
|
||||
class NonLinearNCXIndex(NCXIndex):
|
||||
control_byte_count = 2
|
||||
tag_types = tuple(map(TagMeta, (
|
||||
('offset', 1, 1, 1, 0),
|
||||
('length', 2, 1, 2, 0),
|
||||
('label', 3, 1, 4, 0),
|
||||
('depth', 4, 1, 8, 0),
|
||||
('kind', 5, 1, 16, 0),
|
||||
('parent', 21, 1, 32, 0),
|
||||
('first_child', 22, 1, 64, 0),
|
||||
('last_child', 23, 1, 128, 0),
|
||||
EndTagTable,
|
||||
('pos_fid', 6, 2, 1, 0),
|
||||
EndTagTable
|
||||
)))
|
||||
|
||||
|
||||
|
@ -25,7 +25,7 @@ from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
||||
from calibre.ebooks.oeb.parse_utils import barename
|
||||
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
||||
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
||||
ChunkIndex, GuideIndex)
|
||||
ChunkIndex, GuideIndex, NonLinearNCXIndex)
|
||||
from calibre.ebooks.mobi.writer8.mobi import KF8Book
|
||||
from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences
|
||||
from calibre.ebooks.mobi.writer8.toc import TOCAdder
|
||||
@ -314,11 +314,10 @@ class KF8Writer(object):
|
||||
return
|
||||
|
||||
# Flatten the ToC into a depth first list
|
||||
fl = toc.iter() if is_periodical else toc.iterdescendants()
|
||||
fl = toc.iterdescendants()
|
||||
for i, item in enumerate(fl):
|
||||
entry = {'id': id(item), 'index': i, 'href':item.href,
|
||||
'label':(item.title or _('Unknown')),
|
||||
'children':[]}
|
||||
entry = {'id': id(item), 'index': i, 'label':(item.title or
|
||||
_('Unknown')), 'children':[]}
|
||||
entry['depth'] = getattr(item, 'ncx_hlvl', 0)
|
||||
p = getattr(item, 'ncx_parent', None)
|
||||
if p is not None:
|
||||
@ -333,14 +332,45 @@ class KF8Writer(object):
|
||||
if item.description:
|
||||
entry['description'] = item.description
|
||||
entries.append(entry)
|
||||
href = item.href or ''
|
||||
href, frag = href.partition('#')[0::2]
|
||||
aid = self.id_map.get((href, frag), None)
|
||||
if aid is None:
|
||||
aid = self.id_map.get((href, ''), None)
|
||||
if aid is None:
|
||||
pos, fid = 0, 0
|
||||
chunk = self.chunk_table[pos]
|
||||
offset = chunk.insert_pos + fid
|
||||
else:
|
||||
pos, fid, offset = self.aid_offset_map[aid]
|
||||
|
||||
entry['pos_fid'] = (pos, fid)
|
||||
entry['offset'] = offset
|
||||
|
||||
# The Kindle requires entries to be sorted by (depth, playorder)
|
||||
entries.sort(key=lambda entry: (entry['depth'], entry['index']))
|
||||
# However, I cannot figure out how to deal with non linear ToCs, i.e.
|
||||
# ToCs whose nth entry at depth d has an offset after its n+k entry at
|
||||
# the same depth, so we sort on (depth, offset) instead. This re-orders
|
||||
# the ToC to be linear. A non-linear ToC causes section to section
|
||||
# jumping to not work. kindlegen somehow handles non-linear tocs, but I
|
||||
# cannot figure out how.
|
||||
original = sorted(entries,
|
||||
key=lambda entry: (entry['depth'], entry['index']))
|
||||
linearized = sorted(entries,
|
||||
key=lambda entry: (entry['depth'], entry['offset']))
|
||||
is_non_linear = original != linearized
|
||||
entries = linearized
|
||||
is_non_linear = False # False as we are using the linearized entries
|
||||
|
||||
if is_non_linear:
|
||||
for entry in entries:
|
||||
entry['kind'] = 'chapter'
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
entry['index'] = i
|
||||
id_to_index = {entry['id']:entry['index'] for entry in entries}
|
||||
|
||||
# Write the hierarchical and start offset information
|
||||
# Write the hierarchical information
|
||||
for entry in entries:
|
||||
children = entry.pop('children')
|
||||
if children:
|
||||
@ -348,19 +378,6 @@ class KF8Writer(object):
|
||||
entry['last_child'] = id_to_index[children[-1]]
|
||||
if 'parent_id' in entry:
|
||||
entry['parent'] = id_to_index[entry.pop('parent_id')]
|
||||
href = entry.pop('href')
|
||||
href, frag = href.partition('#')[0::2]
|
||||
aid = self.id_map.get((href, frag), None)
|
||||
if aid is None:
|
||||
aid = self.id_map.get((href, ''), None)
|
||||
if aid is None:
|
||||
pos, fid = 0, 0
|
||||
else:
|
||||
pos, fid = self.aid_offset_map[aid]
|
||||
chunk = self.chunk_table[pos]
|
||||
offset = chunk.insert_pos + fid
|
||||
entry['pos_fid'] = (pos, fid)
|
||||
entry['offset'] = offset
|
||||
|
||||
# Write the lengths
|
||||
def get_next_start(entry):
|
||||
@ -369,13 +386,13 @@ class KF8Writer(object):
|
||||
if enders:
|
||||
return min(enders)
|
||||
return len(self.flows[0])
|
||||
|
||||
for entry in entries:
|
||||
entry['length'] = get_next_start(entry) - entry['offset']
|
||||
|
||||
self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
|
||||
self.uncompressed_record_lengths)
|
||||
self.ncx_records = NCXIndex(entries)()
|
||||
idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex
|
||||
self.ncx_records = idx_type(entries)()
|
||||
|
||||
def create_guide(self):
|
||||
self.start_offset = None
|
||||
@ -389,12 +406,9 @@ class KF8Writer(object):
|
||||
aid = self.id_map.get((href, ''))
|
||||
if aid is None:
|
||||
continue
|
||||
pos, fid = self.aid_offset_map[aid]
|
||||
pos, fid, offset = self.aid_offset_map[aid]
|
||||
if is_guide_ref_start(ref):
|
||||
chunk = self.chunk_table[pos]
|
||||
skel = [s for s in self.skel_table if s.file_number ==
|
||||
chunk.file_number][0]
|
||||
self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
|
||||
self.start_offset = offset
|
||||
self.guide_table.append(GuideRef(ref.title or
|
||||
_('Unknown'), ref.type, (pos, fid)))
|
||||
|
||||
|
@ -138,6 +138,8 @@ class MOBIHeader(Header): # {{{
|
||||
unknown2 = zeroes(8)
|
||||
|
||||
# 192: FDST
|
||||
# In MOBI 6 the fdst record is instead two two byte fields storing the
|
||||
# index of the first and last content records
|
||||
fdst_record = DYN
|
||||
fdst_count = DYN
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user