merge from trunk

This commit is contained in:
Lee 2012-05-22 22:57:21 +08:00
commit d82f376ccd
260 changed files with 96822 additions and 68623 deletions

View File

@ -16,7 +16,6 @@ resources/ebook-convert-complete.pickle
resources/builtin_recipes.xml resources/builtin_recipes.xml
resources/builtin_recipes.zip resources/builtin_recipes.zip
resources/template-functions.json resources/template-functions.json
resources/display/*.js
setup/installer/windows/calibre/build.log setup/installer/windows/calibre/build.log
src/calibre/translations/.errors src/calibre/translations/.errors
src/cssutils/.svn/ src/cssutils/.svn/

View File

@ -19,6 +19,210 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.8.52
date: 2012-05-18
new features:
- title: "EPUB Input: When setting the cover for a book that identifies its cover image, but not the html wrapper around the cover, try to detect and remove that wrapper automatically."
tickets: [ 999959 ]
- title: "When deleting books of a specific format, show the number of books with each format available"
- title: "Linux install: No longer create MAN pages as all utilities have more comprehensive command line --help anyway"
- title: "Add a tweak Preferences->Tweaks to control the default choice of format for the Tweak Book feature"
- title: "Conversion: Allow setting negative page margins. A negative page margin means that calibre will not specify any page margin in the output document (for formats that support this)"
bug fixes:
- title: "Tweak book: Fix handling of covers when tweaking KF8 books"
- title: "KF8 Output: Handle input documents with out of sequence ToC entries. Note that currently section jumping in the KF8 output produced by calibre for such files does not work."
tickets: [1000493]
- title: "Edit metadata dialog: Fix the edit values button for custom tag-like columns showing a unneeded warning about changed values"
- title: "EPUB Output: Be a little more conservative when removing <form> tags. Only remove them if they have actual forms inside. "
tickets: [ 1000384 ]
- title: "EPUB Input: Correctly update the Cover entry in the ToC even when the entry has a fragment reference. "
tickets: [ 999973 ]
- title: "Update ImagMagick DLLs in all calibre binary builds to fix security vulnerabilities in ImageMagick"
tickets: [ 999496 ]
- title: "Advanced search dialog: Fix equals and regex matching not being applied for custom column searches."
tickets: [ 980221 ]
- title: "RTF Input: Handle old RTF files that have commands without braces."
tickets: [ 994133 ]
- title: "Get Books: Diesel, fix results not showing when only a single match is found"
- title: "Get Books: Fix DRM status indicators for Kobo and Diesel stores. Fix smashwords not returning results."
tickets: [ 993755 ]
- title: "Fix regression in 0.8.51 that broke viewing of LIT and some EPUB files"
tickets: [998248, 998216]
improved recipes:
- Clarin
- Spiegel
- Spiegel International
- Montreal Gazette
- Gosc Niedzelny
- Ars Technica
new recipes:
- title: "Army/Navy/Air force/Marine Times and News busters"
author: jde
- title: "Ads of the World, Heavy Meta (Italian) and Juve La Stampa"
author: faber1971
- title: "Revista Summa"
author: Vakya
- title: "Strategic culture"
author: Darko Miletic
- title: Stars and Stripes
author: adoucette
- title: Nackdenkseiten
author: jrda
- version: 0.8.51
date: 2012-05-11
new features:
- title: "When switching libraries preserve the position and selected books if you switch back to a previously opened library."
tickets: [994514]
- title: "Conversion pipeline: Filter out the useless font-face rules inserted by Microsoft Word for every font on the system"
- title: "Driver for Motorola XT875 and Pandigital SuperNova"
tickets: [996890]
- title: "Add a colour swatch the the dialog for creating column coloring rules, to ease selection of colors"
tickets: [994811]
- title: "EPUB Output: Consolidate internal CSS generated by calibre into external stylesheets for ease of editing the EPUB"
- title: "List EPUB and MOBI at the top of the dropdown list fo formats to convert to, as they are the most common choices"
tickets: [994838]
bug fixes:
- title: "E-book viewer: Improve performance when switching between normal and fullscreen views."
tickets: [996102]
- title: "Edit metadata dialog: When running download metadata do not insert duplicate tags into the list of tags"
- title: "KF8 Input: Do not error out if the file has a few invalidly encoded bytes."
tickets: [997034]
- title: "Fix download of news in AZW3 format not working"
tickets: [996439]
- title: "Pocketbook driver: Update for new PB 611 firmware."
tickets: [903079]
- title: "ebook-convert: Error out if the user prvides extra command line args instead of silently ignoring them"
tickets: [994939]
- title: "EPUB Output: Do not self close any container tags to prevent artifacts when EPUBs are viewed using buggy browser based viewers."
tickets: [994861]
- title: "Fix regression in 0.8.50 that broke the conversion of HTML files that contained non-ascii font-face declarations, typically produced by Microsoft Word"
improved recipes:
- Mainichi news
- derStandard
- Endgadget Japan
new recipes:
- title: Mainichi English
author: Hiroshi Miura
- title: The Grid TO
author: Yusuf W
- title: National Geographic (Italy)
author: faber1971
- title: Rebelion
author: Marc Busque
- version: 0.8.50
date: 2012-05-04
new features:
- title: "Tweak Book: Allow tweaking of KF8 MOBI files. Useful to fine-tune the result of a conversion. Right click on the book and select Tweak Book to use the feature. Note that tweaking a MOBI file that contains both KF8 and older MOBI6 will cause the MOBI6 version to be discarded."
- title: "AZW3 output plugin. This output plugin generates pure KF8 mobi files. These only work on the Kindle Fire and Kindle Touch with latest firmware."
- title: "Conversion: Allow easy re-ordering of the search and replace expressions in the conversion dialog. Also apply the expressions in the same order that they were entered when doing the conversion."
- title: "Automatically add the Tag 'Sample Book' when an Amazon sample is added to calibre"
- title: "FB2 Input: Better handling of inline images."
tickets: [989869]
bug fixes:
- title: "KF8 Output: Fix section to section jumps not working for documents with multi-level ToCs"
- title: "EPUB Input: Handle the case of the metadata ToC containing a reference to the cover HTML file."
tickets: [993812]
- title: "CHM Input: Handle files with deeply nested markup and non html files listed at the start of the manifest."
tickets: [993607]
- title: "KF8 Output: Workaround Kindle Touch bug that causes the book to be rendered as black pages when a height is specified for <body>"
- title: "Fix regression in 0.8.49 that broke italics detection in heuristic processing on 32-bit systems."
tickets: [991380]
- title: "KF8 Output: Fix joint MOBI6/KF8 books not being recognized as MOBI files by older Kindles"
- title: "KF8 Output: Fix errors when processing documents with HTML comments and/or XML processing instructions"
- title: "Get Books: Amazon fix prices not being found. B&N fix details link. ebooks.com: fix cover image. Website changes to various EU stores"
- title: "FB2 Input: More robust base64 decoding to handle embedded images that are incorrectly encoded."
tickets: [990929]
- title: "Fix scrolling with the cover browser updating only the selection in the book list, not the current book."
tickets: [990881]
- title: "Save to Disk: Do not run out memory when saving very large files on systems with low RAM."
tickets: [990741]
- title: "FB2 Output: Use 2 letter language codes in preference to 3-letter ones to not break poorly implemented FB2 readers"
tickets: [990026]
- title: "EPUB Input: Auto set the media-type for OPF manifest entries with an empty media-type"
improved recipes:
- National Post
- Daily Mirror
- Sun
- Newsweek Polska
- Max-Planck
- derStandard
- tweakers.net
new recipes:
- title: George Monbiot
author: Darko Miletic
- title: El Mundo
author: atordo
- title: AraInfo and Diagonal
author: Ruben Pollan
- version: 0.8.49 - version: 0.8.49
date: 2012-04-27 date: 2012-04-27

View File

@ -0,0 +1,26 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336986047(BasicNewsRecipe):
title = u'Ads of the World'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = False
description = 'The best international advertising campaigns'
language = 'en'
__author__ = 'faber1971'
no_stylesheets = True
keep_only_tags = [
dict(name='div', attrs={'id':'primary'})
]
remove_tags = [
dict(name='ul', attrs={'class':'links inline'})
,dict(name='div', attrs={'class':'form-item'})
,dict(name='div', attrs={'id':['options', 'comments']})
,dict(name='ul', attrs={'id':'nodePager'})
]
reverse_article_order = True
masthead_url = 'http://bigcatgroup.co.uk/files/2011/01/05-ads-of-the-world.png'
feeds = [(u'Ads of the world', u'http://feeds.feedburner.com/adsoftheworld-latest')]

View File

@ -0,0 +1,43 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AirForceTimes(BasicNewsRecipe):
title = 'Air Force Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Air Force'
language = 'en'
publisher = 'AirForceTimes.com'
category = 'news, U.S. Air Force'
tags = 'news, U.S. Air Force'
cover_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
masthead_url = 'http://www.airforcetimes.com/images/logo_airforcetimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.airforcetimes.com/rss_news.php'),
('Benefits', 'http://www.airforcetimes.com/rss_benefits.php'),
('Money', 'http://www.airforcetimes.com/rss_money.php'),
('Careers & Education', 'http://www.airforcetimes.com/rss_careers.php'),
('Community', 'http://www.airforcetimes.com/rss_community.php'),
('Off Duty', 'http://www.airforcetimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.airforcetimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.airforcetimes.com/rss_guard.php'),
]

42
recipes/army_times.recipe Normal file
View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ArmyTimes(BasicNewsRecipe):
title = 'Army Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Army'
language = 'en'
publisher = 'ArmyTimes.com'
category = 'news, U.S. Army'
tags = 'news, U.S. Army'
cover_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
masthead_url = 'http://www.armytimes.com/images/logo_armytimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.armytimes.com/rss_news.php'),
('Benefits', 'http://www.armytimes.com/rss_benefits.php'),
('Money', 'http://www.armytimes.com/rss_money.php'),
('Careers & Education', 'http://www.armytimes.com/rss_careers.php'),
('Community', 'http://www.armytimes.com/rss_community.php'),
('Off Duty', 'http://www.armytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.armytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.armytimes.com/rss_guard.php'),
]

View File

@ -1,33 +1,34 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
''' '''
arstechnica.com arstechnica.com
''' '''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup
class ArsTechnica(BasicNewsRecipe): class ArsTechnica(BasicNewsRecipe):
title = u'Ars Technica' title = u'Ars Technica'
language = 'en' language = 'en'
__author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou' __author__ = 'Darko Miletic, Sujata Raman, Alexis Rohou'
description = 'The art of technology' description = 'Ars Technica: Serving the technologist for 1.2 decades'
publisher = 'Ars Technica' publisher = 'Conde Nast Publications'
category = 'news, IT, technology' category = 'news, IT, technology'
oldest_article = 5 oldest_article = 5
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True
publication_type = 'newsportal'
extra_css = ''' extra_css = '''
body {font-family: Arial,Helvetica,sans-serif} body {font-family: Arial,sans-serif}
.title{text-align: left} .heading{font-family: "Times New Roman",serif}
.byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none}
.news-item-figure-caption-text{font-size:small; font-style:italic} img{display: block}
.news-item-figure-caption-byline{font-size:small; font-style:italic; font-weight:bold} .caption-text{font-size:small; font-style:italic}
.caption-byline{font-size:small; font-style:italic; font-weight:bold}
''' '''
ignoreEtcArticles = True # Etc feed items can be ignored, as they're not real stories
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
@ -36,50 +37,38 @@ class ArsTechnica(BasicNewsRecipe):
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [
#preprocess_regexps = [ dict(attrs={'class':'standalone'})
# (re.compile(r'<div class="news-item-figure', re.DOTALL|re.IGNORECASE),lambda match: '<div class="news-item-figure"') ,dict(attrs={'id':'article-guts'})
# ,(re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>') ]
# ]
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
remove_tags = [ remove_tags = [
dict(name=['object','link','embed']) dict(name=['object','link','embed','iframe','meta'])
,dict(name='div', attrs={'class':'read-more-link'}) ,dict(attrs={'class':'corner-info'})
] ]
#remove_attributes=['width','height'] remove_attributes = ['lang']
feeds = [ feeds = [
(u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' ) (u'Infinite Loop (Apple content)' , u'http://feeds.arstechnica.com/arstechnica/apple/' )
,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' ) ,(u'Opposable Thumbs (Gaming content)' , u'http://feeds.arstechnica.com/arstechnica/gaming/' )
,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' ) ,(u'Gear and Gadgets' , u'http://feeds.arstechnica.com/arstechnica/gadgets/' )
,(u'Chipster (Hardware content)' , u'http://feeds.arstechnica.com/arstechnica/hardware/' )
,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' ) ,(u'Uptime (IT content)' , u'http://feeds.arstechnica.com/arstechnica/business/' )
,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/') ,(u'Open Ended (Open Source content)' , u'http://feeds.arstechnica.com/arstechnica/open-source/')
,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' ) ,(u'One Microsoft Way' , u'http://feeds.arstechnica.com/arstechnica/microsoft/' )
,(u'Nobel Intent (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' ) ,(u'Scientific method (Science content)' , u'http://feeds.arstechnica.com/arstechnica/science/' )
,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/') ,(u'Law & Disorder (Tech policy content)' , u'http://feeds.arstechnica.com/arstechnica/tech-policy/')
] ]
# This deals with multi-page stories
def append_page(self, soup, appendtag, position): def append_page(self, soup, appendtag, position):
pager = soup.find('div',attrs={'class':'pager'}) pager = soup.find(attrs={'class':'numbers'})
if pager: if pager:
for atag in pager.findAll('a',href=True): nexttag = pager.find(attrs={'class':'next'})
str = self.tag_to_string(atag) if nexttag:
if str.startswith('Next'): nurl = nexttag.parent['href']
nurl = 'http://arstechnica.com' + atag['href']
rawc = self.index_to_soup(nurl,True) rawc = self.index_to_soup(nurl,True)
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding) soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
texttag = soup2.find(attrs={'id':'article-guts'})
readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
if readmoretag:
readmoretag.extract()
texttag = soup2.find('div', attrs={'class':'body'})
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents) newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos) self.append_page(soup2,texttag,newpos)
texttag.extract() texttag.extract()
@ -88,41 +77,24 @@ class ArsTechnica(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
# Adds line breaks near the byline (not sure why this is needed)
ftag = soup.find('div', attrs={'class':'byline'})
if ftag:
brtag = Tag(soup,'br')
brtag2 = Tag(soup,'br')
ftag.insert(4,brtag)
ftag.insert(5,brtag2)
# Remove style items
for item in soup.findAll(style=True):
del item['style']
# Remove id
for item in soup.findAll(id=True):
del item['id']
# For some reason, links to authors don't have the domainname
a_author = soup.find('a',{'href':re.compile("^/author")})
if a_author:
a_author['href'] = 'http://arstechnica.com'+a_author['href']
# within div class news-item-figure, we need to grab images
# Deal with multi-page stories
self.append_page(soup, soup.body, 3) self.append_page(soup, soup.body, 3)
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup return soup
def get_article_url(self, article): def preprocess_raw_html(self, raw, url):
# If the article title starts with Etc:, don't return it return '<html><head>'+raw[raw.find('</head>'):]
if self.ignoreEtcArticles:
article_title = article.get('title',None)
if re.match('Etc: ',article_title) is not None:
return None
# The actual article is in a guid tag
return article.get('guid', None).rpartition('?')[0]

View File

@ -1,6 +1,6 @@
from __future__ import unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
''' '''
clarin.com clarin.com
''' '''
@ -8,9 +8,9 @@ clarin.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Clarin(BasicNewsRecipe): class Clarin(BasicNewsRecipe):
title = 'Clarin' title = 'Clarín'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Noticias de Argentina y mundo' description = 'Clarin.com. Noticias de la Argentina y el mundo. Información actualizada las 24 horas y en español. Informate ya'
publisher = 'Grupo Clarin' publisher = 'Grupo Clarin'
category = 'news, politics, Argentina' category = 'news, politics, Argentina'
oldest_article = 2 oldest_article = 2
@ -26,9 +26,7 @@ class Clarin(BasicNewsRecipe):
extra_css = """ extra_css = """
body{font-family: Arial,Helvetica,sans-serif} body{font-family: Arial,Helvetica,sans-serif}
h2{font-family: Georgia,serif; font-size: xx-large} h2{font-family: Georgia,serif; font-size: xx-large}
.hora{font-weight:bold} .info,.nombre-autor,.hora{font-size: small}
.hd p{font-size: small}
.nombre-autor{color: #0F325A}
""" """
conversion_options = { conversion_options = {
@ -38,38 +36,35 @@ class Clarin(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [dict(attrs={'class':['hd','mt']})] keep_only_tags = [dict(attrs={'class':['hd','mt','bd']})]
remove_tags = [dict(name=['meta','base','link'])] remove_tags = [dict(name=['meta','base','link','iframe','embed','object'])]
remove_attributes = ['lang','_mce_bogus'] remove_attributes = ['lang']
feeds = [ feeds = [
(u'Pagina principal', u'http://www.clarin.com/rss/' ) (u'Pagina principal', u'http://www.clarin.com/rss/' )
,(u'Politica' , u'http://www.clarin.com/rss/politica/' ) ,(u'Politica' , u'http://www.clarin.com/rss/politica/' )
,(u'Deportes' , u'http://www.clarin.com/rss/deportes/' ) ,(u'Deportes' , u'http://www.clarin.com/rss/deportes/' )
,(u'Economia' , u'http://www.clarin.com/economia/' )
,(u'Mundo' , u'http://www.clarin.com/rss/mundo/' ) ,(u'Mundo' , u'http://www.clarin.com/rss/mundo/' )
,(u'iEco' , u'http://www.ieco.clarin.com/rss/' )
,(u'Espectaculos' , u'http://www.clarin.com/rss/espectaculos/') ,(u'Espectaculos' , u'http://www.clarin.com/rss/espectaculos/')
,(u'Sociedad' , u'http://www.clarin.com/rss/sociedad/' ) ,(u'Sociedad' , u'http://www.clarin.com/rss/sociedad/' )
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' ) ,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
,(u'Policiales' , u'http://www.clarin.com/rss/policiales/' ) ,(u'Policiales' , u'http://www.clarin.com/rss/policiales/' )
,(u'Internet' , u'http://www.clarin.com/rss/internet/' ) ,(u'Internet' , u'http://www.clarin.com/rss/internet/' )
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
] ]
def get_article_url(self, article):
return article.get('guid', None)
def print_version(self, url): def print_version(self, url):
return url + '?print=1' return url + '?print=1'
def get_article_url(self, article):
return article.get('guid', None)
def get_cover_url(self): def get_cover_url(self):
cover_url = None cover_url = None
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('div',attrs={'class':'bb-md bb-md-edicion_papel'}) for item in soup.findAll('a', href=True):
if cover_item: if item['href'].startswith('/tapas/TAPA_CLA'):
ap = cover_item.find('a',attrs={'href':'/edicion-impresa/'}) cover_url = self.INDEX + item['href']
if ap: return cover_url
cover_url = self.INDEX + ap.img['src']
return cover_url return cover_url

View File

@ -1,5 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from claibre import browser from calibre import browser
import re import re
class AdvancedUserRecipe1306061239(BasicNewsRecipe): class AdvancedUserRecipe1306061239(BasicNewsRecipe):

View File

@ -7,10 +7,11 @@ __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper ''' ''' http://www.derstandard.at - Austrian Newspaper '''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from time import strftime
class DerStandardRecipe(BasicNewsRecipe): class DerStandardRecipe(BasicNewsRecipe):
title = u'derStandard' title = u'derStandard'
__author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira' __author__ = 'Gerhard Aigner and Sujata Raman and Marcel Jira and Peter Reschenhofer'
description = u'Nachrichten aus Österreich' description = u'Nachrichten aus Österreich'
publisher ='derStandard.at' publisher ='derStandard.at'
category = 'news, politics, nachrichten, Austria' category = 'news, politics, nachrichten, Austria'
@ -88,3 +89,41 @@ class DerStandardRecipe(BasicNewsRecipe):
for t in soup.findAll(['ul', 'li']): for t in soup.findAll(['ul', 'li']):
t.name = 'div' t.name = 'div'
return soup return soup
def get_cover_url(self):
highResolution = True
date = strftime("%Y/%Y%m%d")
# it is also possible for the past
#date = '2012/20120503'
urlP1 = 'http://epaper.derstandarddigital.at/'
urlP2 = 'data_ep/STAN/' + date
urlP3 = '/V.B1/'
urlP4 = 'paper.htm'
urlHTML = urlP1 + urlP2 + urlP3 + urlP4
br = self.clone_browser(self.browser)
htmlF = br.open_novisit(urlHTML)
htmlC = htmlF.read()
# URL EXAMPLE: data_ep/STAN/2012/20120504/V.B1/pages/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE.htm
# consists of part2 + part3 + 'pages/' + code
# 'pages/' has length 6, code has lenght 36
index = htmlC.find(urlP2) + len(urlP2 + urlP3) + 6
code = htmlC[index:index + 36]
# URL EXAMPLE HIGH RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/A3B6798F-2751-4D8D-A103-C5EF22F7ACBE_b.png
# URL EXAMPLE LOW RESOLUTION: http://epaper.derstandarddigital.at/data_ep/STAN/2012/20120504/pagejpg/2AB52F71-11C1-4859-9114-CDCD79BEFDCB.png
urlPic = urlP1 + urlP2 + '/pagejpg/' + code
if highResolution:
urlPic = urlPic + '_b'
urlPic = urlPic + '.png'
return urlPic

View File

@ -0,0 +1,15 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from calibre.web.feeds.news import BasicNewsRecipe
class BasicUserRecipe1337668045(BasicNewsRecipe):
title = u'Drytooling.com.pl'
masthead_url = 'http://drytooling.com.pl/images/drytooling-kindle.png'
cover_url = 'http://drytooling.com.pl/images/drytooling-kindle.png'
description = u'Drytooling.com.pl jest serwisem wspinaczki zimowej, alpinizmu i himalaizmu. Jeśli uwielbiasz zimę, nie możesz doczekać się aż wyciągniesz szpej z szafki i uderzysz w Tatry, Alpy, czy może Himalaje, to znajdziesz tutaj naprawdę dużo interesujących Cię treści! Zapraszamy!'
__author__ = u'Damian Granowski'
oldest_article = 100
max_articles_per_feed = 20
auto_cleanup = True
feeds = [(u'Newsy', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=4&format=raw'), (u'Artyku\u0142y', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=3&format=raw'), (u'Imprezy i zawody', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=5&format=raw'), (u'Baza G\xf3rska', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=6&format=raw'), (u'Wyprawy', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=7&format=raw'), (u'Newsy / alpinizm', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=12&format=raw'), (u'Newsy / klasyka zimowa', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=11&format=raw'), (u'Newsy / himalaizm', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=10&format=raw'), (u'Outdoor', u'http://drytooling.com.pl/index.php?option=com_ninjarsssyndicator&feed_id=8&format=raw')]

30
recipes/economico.recipe Normal file
View File

@ -0,0 +1,30 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Economico(BasicNewsRecipe):
title = u'Economico'
language = 'pt'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
encoding = 'utf-8'
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('Ultima Hora',
'http://economico.sapo.pt/rss/ultimas'),
('Em Foco',
'http://economico.sapo.pt/rss/emfoco'),
('Mercados',
'http://economico.sapo.pt/rss/mercados'),
('Empresas',
'http://economico.sapo.pt/rss/empresas'),
('Economia',
'http://economico.sapo.pt/rss/economia'),
('Politica',
'http://economico.sapo.pt/rss/politica'),
]

View File

@ -17,7 +17,25 @@ class EndgadgetJapan(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
language = 'ja' language = 'ja'
encoding = 'utf-8' encoding = 'utf-8'
feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')] index = 'http://japanese.engadget.com/'
remove_javascript = True
remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
remove_tags_after = dict(name='div', attrs={'class':'post_body'})
def parse_index(self):
feeds = []
newsarticles = []
soup = self.index_to_soup(self.index)
for topstories in soup.findAll('div',attrs={'class':'post_content'}):
itt = topstories.find('h4')
itema = itt.find('a',href=True)
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('Latest Posts', newsarticles))
return feeds
remove_tags_before = dict(name="div", attrs={'id':"content_wrap"})
remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})

82
recipes/folha.recipe Normal file
View File

@ -0,0 +1,82 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.folha.uol.com.br
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Folha_de_s_paulo(BasicNewsRecipe):
title = u'Folha de São Paulo - portal'
__author__ = 'Darko Miletic'
description = 'Um Jornala a servicao do Brasil'
publisher = 'Folhapress'
category = 'news, politics, Brasil'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'pt_BR'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [dict(name=['meta','link','base','iframe','embed','object'])]
keep_only_tags = [dict(attrs={'id':'articleNew'})]
feeds = [
(u'Poder' , u'http://feeds.folha.uol.com.br/poder/rss091.xml' )
,(u'Mundo' , u'http://feeds.folha.uol.com.br/mundo/rss091.xml' )
,(u'Mercado' , u'http://feeds.folha.uol.com.br/mercado/rss091.xml' )
,(u'Cotidiano' , u'http://feeds.folha.uol.com.br/cotidiano/rss091.xml' )
,(u'Esporte' , u'http://feeds.folha.uol.com.br/esporte/rss091.xml' )
,(u'Ilustrada' , u'http://feeds.folha.uol.com.br/ilustrada/rss091.xml' )
,(u'F5' , u'http://feeds.folha.uol.com.br/f5/rss091.xml' )
,(u'Ciência' , u'http://feeds.folha.uol.com.br/ciencia/rss091.xml' )
,(u'Tec' , u'http://feeds.folha.uol.com.br/tec/rss091.xml' )
,(u'Ambiente' , u'http://feeds.folha.uol.com.br/ambiente/rss091.xml' )
,(u'Bichos' , u'http://feeds.folha.uol.com.br/bichos/rss091.xml' )
,(u'Celebridades' , u'http://feeds.folha.uol.com.br/celebridades/rss091.xml' )
,(u'Comida' , u'http://feeds.folha.uol.com.br/comida/rss091.xml' )
,(u'Equilibrio' , u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml' )
,(u'Folhateen' , u'http://feeds.folha.uol.com.br/folhateen/rss091.xml' )
,(u'Folhinha' , u'http://feeds.folha.uol.com.br/folhinha/rss091.xml' )
,(u'Ilustrissima' , u'http://feeds.folha.uol.com.br/ilustrissima/rss091.xml' )
,(u'Saber' , u'http://feeds.folha.uol.com.br/saber/rss091.xml' )
,(u'Turismo' , u'http://feeds.folha.uol.com.br/turismo/rss091.xml' )
,(u'Panel do Leitor', u'http://feeds.folha.uol.com.br/folha/paineldoleitor/rss091.xml')
,(u'Publifolha' , u'http://feeds.folha.uol.com.br/folha/publifolha/rss091.xml' )
,(u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml' )
]
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
curl = url.partition('/*')[2]
return curl
def print_version(self, url):
return 'http://tools.folha.com.br/print?site=emcimadahora&url=' + urllib.quote_plus(url)
def get_cover_url(self):
soup = self.index_to_soup('http://www.folha.uol.com.br/')
cont = soup.find('div', attrs={'id':'newspaper'})
if cont:
ai = cont.find('a', href='http://www1.folha.uol.com.br/fsp/')
if ai:
return ai.img['src']
return None

View File

@ -8,7 +8,7 @@ from urllib2 import Request, urlopen, URLError
class FolhaOnline(BasicNewsRecipe): class FolhaOnline(BasicNewsRecipe):
THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here THUMBALIZR_API = '' # ---->Get your at http://www.thumbalizr.com/ and put here
LANGUAGE = 'pt_br' LANGUAGE = 'pt_br'
language = 'pt' language = 'pt_BR'
LANGHTM = 'pt-br' LANGHTM = 'pt-br'
ENCODING = 'cp1252' ENCODING = 'cp1252'
ENCHTM = 'iso-8859-1' ENCHTM = 'iso-8859-1'

View File

@ -14,7 +14,7 @@ class FSP(BasicNewsRecipe):
HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/' HOMEPAGE = 'http://www1.folha.uol.com.br/fsp/'
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
language = 'pt' language = 'pt_BR'
no_stylesheets = True no_stylesheets = True
max_articles_per_feed = 40 max_articles_per_feed = 40
remove_javascript = True remove_javascript = True

View File

@ -6,21 +6,20 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from datetime import date
import re import re
class GN(BasicNewsRecipe): class GN(BasicNewsRecipe):
EDITION = 0 EDITION = 0
__author__ = 'Piotr Kontek' __author__ = 'Piotr Kontek'
title = u'Gość niedzielny'
description = 'Weekly magazine' description = 'Weekly magazine'
encoding = 'utf-8' encoding = 'utf-8'
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
remove_javascript = True remove_javascript = True
temp_files = [] temp_files = []
simultaneous_downloads = 1
masthead_url = 'http://gosc.pl/files/11/03/12/949089_top.gif'
title = u'Gość niedzielny'
articles_are_obfuscated = True articles_are_obfuscated = True
@ -56,22 +55,28 @@ class GN(BasicNewsRecipe):
self.temp_files[-1].close() self.temp_files[-1].close()
return self.temp_files[-1].name return self.temp_files[-1].name
def find_last_issue(self): def find_last_issue(self, year):
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny') soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
#szukam zdjęcia i linka do porzedniego pełnego numeru
#szukam zdjęcia i linka do poprzedniego pełnego numeru
first = True first = True
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}): for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
img = d.find('img') img = d.find('img')
if img != None: if img != None:
a = img.parent a = img.parent
self.EDITION = a['href'] self.EDITION = a['href']
self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src'] self.cover_url = 'http://www.gosc.pl' + img['src']
if not first: if year != date.today().year or not first:
break break
first = False first = False
def parse_index(self): def parse_index(self):
self.find_last_issue() year = date.today().year
self.find_last_issue(year)
##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
if self.EDITION == 0:
self.find_last_issue(year-1)
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION) soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
feeds = [] feeds = []
#wstepniak #wstepniak

79
recipes/grid_to.recipe Normal file
View File

@ -0,0 +1,79 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TheGridTO(BasicNewsRecipe):
#: The title to use for the ebook
title = u'The Grid TO'
#: A couple of lines that describe the content this recipe downloads.
#: This will be used primarily in a GUI that presents a list of recipes.
description = (u'The Grid is a weekly city magazine and daily website providing a fresh, '
'accessible voice for Toronto.')
#: The author of this recipe
__author__ = u'Yusuf W'
#: The language that the news is in. Must be an ISO-639 code either
#: two or three characters long
language = 'en_CA'
#: Publication type
#: Set to newspaper, magazine or blog
publication_type = 'newspaper'
#: Convenient flag to disable loading of stylesheets for websites
#: that have overly complex stylesheets unsuitable for conversion
#: to ebooks formats
#: If True stylesheets are not downloaded and processed
no_stylesheets = True
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
remove_tags_before = dict(name='div', id='content')
remove_tags_after = dict(name='div', id='content')
remove_tags = [
dict(name='div', attrs={'class':'right-content pull-right'}),
dict(name='div', attrs={'class':'right-content'}),
dict(name='div', attrs={'class':'ftr-line'}),
dict(name='div', attrs={'class':'pull-right'}),
dict(name='div', id='comments'),
dict(name='div', id='tags')
]
#: Keep only the specified tags and their children.
#keep_only_tags = [dict(name='div', id='content')]
cover_margins = (0, 0, '#ffffff')
INDEX = 'http://www.thegridto.com'
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX)
cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src']
return cover_url
def parse_index(self):
# Get the latest issue
soup = self.index_to_soup(self.INDEX)
a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2]
# Parse the index of the latest issue
self.INDEX = self.INDEX + a['href']
soup = self.index_to_soup(self.INDEX)
feeds = []
for section in ['city', 'life', 'culture']:
section_class = 'left-content article-listing ' + section + ' pull-left'
div = soup.find(attrs={'class': section_class})
articles = []
for tag in div.findAllNext(attrs={'class':'search-block'}):
a = tag.findAll('a', href=True)[1]
title = self.tag_to_string(a)
url = a['href']
articles.append({'title': title, 'url': url, 'description':'', 'date':''})
feeds.append((section, articles))
return feeds

View File

@ -0,0 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336289226(BasicNewsRecipe):
title = u'Heavy Metal'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = False
masthead_url = 'http://net-static2.tccstatic.com/template/tmw/img/tj.gif'
feeds = [(u'Heavy Metal', u'http://www.heavy-metal.it/feed/')]
keep_only_tags = [
dict(name='div', attrs={'class':'entry'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'sociable'})
]
description = 'An Heavy metal Italian magazine'
__author__ = 'faber1971'
language = 'it'
__version__ = 'v1.0'
__date__ = '6, May 2012'

Binary file not shown.

After

Width:  |  Height:  |  Size: 1007 B

BIN
recipes/icons/folha.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 648 B

View File

@ -20,6 +20,8 @@ class JijiDotCom(BasicNewsRecipe):
top_url = 'http://www.jiji.com/' top_url = 'http://www.jiji.com/'
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
remove_tags_before = dict(id="article-area")
remove_tags_after = dict(id="ad_google") remove_tags_after = dict(id="ad_google")
def get_cover_url(self): def get_cover_url(self):

View File

@ -0,0 +1,24 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336504510(BasicNewsRecipe):
title = u'Juve - La Stampa'
oldest_article = 1
language = 'it'
max_articles_per_feed = 100
auto_cleanup = True
masthead_url = 'http://www3.lastampa.it/fileadmin/media/sport/quijuve/top_quijuve.jpg'
feeds = [(u'Qui Juve - La Stampa', u'http://feed43.com/2352784107537677.xml')]
remove_tags = [dict(name='div',attrs={'class':['article-toolbar', 'sezione sezione-news', 'intestazione']})]
extra_css = '''
div.dettaglio div.immagine_girata p.news-single-imgcaption {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
.sezione {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
body {color: #000000; font-family: "Georgia", "Times", serif; font-size: 7px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
h3 {color: #000000; font-family: "Georgia", "Times", serif; font-size: 22px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
div.dettaglio h2.catenaccio {color: #000000; font-family: "Georgia", "Times", serif; font-size: 18px; font-weight: 400;line-height: 1.2; padding-bottom: 12px; text-transform: none; }
'''
description = 'News about Juventus from La Stampa'
__author__ = 'faber1971'
__version__ = 'v1.0'
__date__ = '8, May 2012'

View File

@ -1,7 +1,7 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini' __author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini; minor fixes by faber1971'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>' __copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>, Lorenzo Vigentini <l.vigentini at gmail.com>, faber1971'
description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version' description = 'Italian daily newspaper - v1.02 (04, January 2010); 16.05.2010 new version; 17.10.2011 new version; 14.12.2011 new version; 11.05.2012 new version'
''' '''
http://www.repubblica.it/ http://www.repubblica.it/
@ -12,14 +12,14 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LaRepubblica(BasicNewsRecipe): class LaRepubblica(BasicNewsRecipe):
title = 'La Repubblica' title = 'La Repubblica'
__author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic' __author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic, faber1971'
description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.' description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.'
masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png' masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png'
publisher = 'Gruppo editoriale L\'Espresso' publisher = 'Gruppo editoriale L\'Espresso'
category = 'News, politics, culture, economy, general interest' category = 'News, politics, culture, economy, general interest'
language = 'it' language = 'it'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
oldest_article = 5 oldest_article = 1
encoding = 'utf8' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
@ -59,6 +59,7 @@ class LaRepubblica(BasicNewsRecipe):
dict(attrs={'class':'articolo'}), dict(attrs={'class':'articolo'}),
dict(attrs={'class':'body-text'}), dict(attrs={'class':'body-text'}),
dict(name='p', attrs={'class':'disclaimer clearfix'}), dict(name='p', attrs={'class':'disclaimer clearfix'}),
dict(name='div', attrs={'id':'main'}),
dict(attrs={'id':'contA'}) dict(attrs={'id':'contA'})
] ]
@ -67,7 +68,7 @@ class LaRepubblica(BasicNewsRecipe):
dict(name=['object','link','meta','iframe','embed']), dict(name=['object','link','meta','iframe','embed']),
dict(name='span',attrs={'class':'linkindice'}), dict(name='span',attrs={'class':'linkindice'}),
dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}), dict(name='div', attrs={'class':['bottom-mobile','adv adv-middle-inline']}),
dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head']}), dict(name='div', attrs={'id':['rssdiv','blocco','fb-like-head', 'sidebar']}),
dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}), dict(name='div', attrs={'class':['utility','fb-like-button','archive-button']}),
dict(name='div', attrs={'class':'generalbox'}), dict(name='div', attrs={'class':'generalbox'}),
dict(name='ul', attrs={'id':'hystory'}) dict(name='ul', attrs={'id':'hystory'})
@ -88,11 +89,12 @@ class LaRepubblica(BasicNewsRecipe):
(u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'), (u'Sport', u'http://www.repubblica.it/rss/sport/rss2.0.xml'),
(u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'), (u'Calcio', u'http://www.repubblica.it/rss/sport/calcio/rss2.0.xml'),
(u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'), (u'Motori', u'http://www.repubblica.it/rss/motori/rss2.0.xml'),
(u'Edizione Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'), (u'Roma', u'http://roma.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'), (u'Torino', u'http://torino.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'), (u'Milano', u'feed://milano.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'), (u'Napoli', u'feed://napoli.repubblica.it/rss/rss2.0.xml'),
(u'Edizione Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml') (u'Bari', u'http://bari.repubblica.it/rss/rss2.0.xml'),
(u'Palermo', u'feed://palermo.repubblica.it/rss/rss2.0.xml')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -16,12 +16,12 @@ class MainichiDailyNews(BasicNewsRecipe):
publisher = 'Mainichi Daily News' publisher = 'Mainichi Daily News'
category = 'news, japan' category = 'news, japan'
language = 'ja' language = 'ja'
index = 'http://mainichi.jp/select/'
feeds = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')] remove_javascript = True
masthead_title = u'MAINICHI DAILY NEWS'
remove_tags_before = {'class':"NewsTitle"} remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}] remove_tags_after = {'class':"NewsBody clr"}
remove_tags_after = {'class':"Credit"}
def parse_feeds(self): def parse_feeds(self):
@ -32,9 +32,30 @@ class MainichiDailyNews(BasicNewsRecipe):
for a,curarticle in enumerate(curfeed.articles): for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url): if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle) delList.append(curarticle)
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0: if len(delList)>0:
for d in delList: for d in delList:
index = curfeed.articles.index(d) index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = [] curfeed.articles[index:index+1] = []
return feeds return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'MaiLink'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('latest', newsarticles))
return feeds

View File

@ -0,0 +1,67 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.mainichi.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiEnglishNews(BasicNewsRecipe):
title = u'The Mainichi'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 40
description = 'Japanese traditional newspaper Mainichi news in English'
publisher = 'Mainichi News'
category = 'news, japan'
language = 'en_JP'
index = 'http://mainichi.jp/english/english/index.html'
remove_javascript = True
masthead_url = 'http://mainichi.jp/english/images/themainichi.png'
remove_tags_before = {'class':"NewsTitle"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
for section in soup.findAll('section'):
newsarticles = []
section_name = 'news'
hds = section.find('div', attrs={'class':'CategoryHead clr'})
if hds:
section_item = hds.find('h1')
if section_item:
section_name = section_item.find('a').string
items = section.find('ul', attrs={'class':'MaiLink'})
for item in items.findAll('li'):
if item:
itema = item.find('a')
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append((section_name, newsarticles))
return feeds

View File

@ -1,34 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class MainichiDailyITNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 100
description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
publisher = 'Mainichi Daily News'
category = 'news, Japan, IT, Electronics'
language = 'ja'
feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}]
remove_tags_after = {'class':"Credit"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'pheedo.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds

View File

@ -0,0 +1,59 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.mainichi.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiDailyScienceNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(Science)'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 20
description = 'Japanese traditional newspaper Mainichi Daily News - science'
publisher = 'Mainichi Daily News'
category = 'news, japan'
language = 'ja'
index = 'http://mainichi.jp/select/science'
remove_javascript = True
masthead_title = u'MAINICHI DAILY NEWS'
remove_tags_before = {'class':"NewsTitle"}
remove_tags_after = {'class':"NewsBody clr"}
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'MaiLink'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
if itema:
newsarticles.append({
'title' :itema.string
,'date' :''
,'url' :itema['href']
,'description':''
})
feeds.append(('Science', newsarticles))
return feeds

View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MarineCorpsTimes(BasicNewsRecipe):
title = 'Marine Corps Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Marine Corps'
language = 'en'
publisher = 'MarineCorpsTimes.com'
category = 'news, U.S. Marine Corps'
tags = 'news, U.S. Marine Corps'
cover_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
masthead_url = 'http://www.marinecorpstimes.com/images/logo_marinetimes-alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.MarineCorpstimes.com/rss_news.php'),
('Benefits', 'http://www.MarineCorpstimes.com/rss_benefits.php'),
('Money', 'http://www.MarineCorpstimes.com/rss_money.php'),
('Careers & Education', 'http://www.MarineCorpstimes.com/rss_careers.php'),
('Community', 'http://www.MarineCorpstimes.com/rss_community.php'),
('Off Duty', 'http://www.MarineCorpstimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.MarineCorpstimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.MarineCorpstimes.com/rss_guard.php'),
]

View File

@ -56,7 +56,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href'] remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href']
use_embedded_content = False use_embedded_content = False
extra_css = 'body{font-size:1em;padding:5px 0}body,a,h2{background-color:#fff;text-decoration:none;color:#000}#date,div.byline,p.article-image-caption .credits,.calibrenavbar{font-size:.5em}.article-box-fact.module-title,#date,div.byline{clear:both}.article-box-fact.module-title{margin:8px 0}.article-box-fact.module-title,h2{font-size:1.1em}h1.title{font-size:1.4em}h1.title,.article-body p,div.article-image-caption-2column,div.article-image-caption-3column,#date,div.byline{margin-bottom:.6em}div.article-box-fact div.subtitle,.article-box-fact.module-title,h1.title,p.article-image-caption{font-weight:700}div.column-1-3{margin-left:19px}div.column-1-2{display:inline}div.column-1-2,div.column-1-3{margin-right:7px}p.article-image-caption{font-size:.6em;margin-top:5px}p.article-image-caption,#date,div.byline{color:#616262}p.article-image-caption .credits{font-style:italic}div.article-image-caption{width:246px}div.article-image-caption-2column{width:373px}div.column-3{background-color:#eee;float:right;width:50%}div.column-3 module-title{border:1px solid #aaa}div.article-box-fact div.subtitle,.article-box-fact.module-title{color:#24763b}div.byline{border-top:2px solid #24763b}div.column-3,img,div.column-3,p.small,div.article-image-caption{margin:.5em}img,p.small,.column1,h2{border:0;padding:0}.column1,h1,h2{margin:0}' extra_css = 'body{font-size:1em;padding:5px 0}body,a,h2{background-color:#fff;text-decoration:none;color:#000}#date,div.byline,p.article-image-caption .credits,.calibrenavbar,.calibre5{font-size:.5em}.article-box-fact.module-title,#date,div.byline{clear:both}.article-box-fact{font-size:0.7em}.article-box-fact.module-title{margin:8px 0; font-size:0.8em}h2{font-size:1em}h1.title{font-size:1.4em}h1.title,.article-body p,div.article-image-caption-2column,div.article-image-caption-3column,#date,div.byline{margin-bottom:.6em}div.article-box-fact div.subtitle,.article-box-fact.module-title,h1.title,p.article-image-caption{font-weight:700}div.column-1-3{margin-left:19px}div.column-1-2{display:inline}div.column-1-2,div.column-1-3{margin-right:7px}p.article-image-caption{font-size:.6em;margin-top:5px}p.article-image-caption,#date,div.byline{color:#616262}p.article-image-caption .credits{font-style:italic}div.article-image-caption{width:246px}div.article-image-caption-2column{width:373px}div.column-3{background-color:#eee;float:right;width:50%}div.column-3 module-title{border:1px solid #aaa}div.article-box-fact div.subtitle,.article-box-fact.module-title{color:#24763b}div.byline{border-top:2px solid #24763b}div.column-3,img,div.column-3,p.small,div.article-image-caption{margin:.5em}img,p.small,.column1,h2,.calibre5,.calibrenavbar{border:0;padding:0}.column1,h1,h2,.calibrenavbar{margin:0}'
preprocess_regexps = [ preprocess_regexps = [
@ -71,11 +71,11 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name=['iframe','script','noscript','style']), dict(name=['iframe','script','noscript','style']),
dict(name='div', attrs={'class':[re.compile('column-[14]-5'),'col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)')]}), dict(name='div', attrs={'class':['column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}),
dict(id=['column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'sidebar',re.compile('^article-\d'),'comments','gallery-1']), dict(id=['column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1']),
dict(name='a', attrs={'name':'comments'}), dict(name='a', attrs={'name':'comments'}),
#dict(name='div', attrs={'data-href'}), #dict(name='div', attrs={'data-href'}),
dict(name='img', attrs={'class':'top-line'}), dict(name='img', attrs={'class':'top-line','title':'volledig scherm'}),
dict(attrs={'style':re.compile('^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$'),'title':'volledig scherm'})] dict(attrs={'style':re.compile('^(.*(display\s?:\s?none|img-mask|white)\s?;?.*)$'),'title':'volledig scherm'})]
'''removed by before/after: '''removed by before/after:

View File

@ -0,0 +1,41 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MilitaryTimes(BasicNewsRecipe):
title = 'Military Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Military'
language = 'en'
publisher = 'MilitaryTimes.com'
category = 'news, U.S. Military'
tags = 'news, U.S. Military'
cover_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
masthead_url = 'http://www.militarytimes.com/images/logo_militarytimes_landing-s.gif'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.militarytimes.com/rss_news.php'),
('Benefits', 'http://www.militarytimes.com/rss_benefits.php'),
('Money', 'http://www.militarytimes.com/rss_money.php'),
('Careers & Education', 'http://www.militarytimes.com/rss_careers.php'),
('Community', 'http://www.militarytimes.com/rss_community.php'),
('Off Duty', 'http://www.militarytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.militarytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.militarytimes.com/rss_guard.php'),
]

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
@ -7,77 +6,21 @@ __license__ = 'GPL v3'
www.canada.com www.canada.com
''' '''
import re from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist # un-comment the following three lines for the Montreal Gazette
## title = u'Victoria Times Colonist'
## url_prefix = 'http://www.timescolonist.com'
## description = u'News from Victoria, BC'
## fp_tag = 'CAN_TC'
# un-comment the following four lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP'
# un-comment the following four lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post
## title = u'Regina Leader-Post'
## url_prefix = 'http://www.leaderpost.com'
## description = u'News from Regina, SK'
## fp_tag = ''
# un-comment the following four lines for the Saskatoon Star-Phoenix
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette
title = u'Montreal Gazette' title = u'Montreal Gazette'
url_prefix = 'http://www.montrealgazette.com'
description = u'News from Montreal, QC' description = u'News from Montreal, QC'
fp_tag = 'CAN_MG'
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
auto_cleanup = True
auto_cleanup_keep = '//*[@id="imageBox"]'
timefmt = ' [%b %d]' timefmt = ' [%b %d]'
extra_css = ''' extra_css = '''
.timestamp { font-size:xx-small; display: block; } .timestamp { font-size:xx-small; display: block; }
@ -87,135 +30,19 @@ class CanWestPaper(BasicNewsRecipe):
.byline { font-size:xx-small; } .byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic } #photocaption { font-size: small; font-style: italic }
#photocredit { font-size: xx-small; }''' #photocredit { font-size: xx-small; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, date
if self.fp_tag=='':
return None
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
try:
br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self, soup):
return self.strip_anchors(soup)
def parse_index(self): feeds = [
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html') ('News',
'http://rss.canada.com/get/?F297'),
('Sports',
'http://rss.canada.com/get/?F299'),
('Entertainment',
'http://rss.canada.com/get/?F7366'),
('Business',
'http://rss.canada.com/get/?F6939'),
]
articles = {}
key = 'News'
ans = ['News']
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
description = ''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -0,0 +1,22 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Nachdenkseiten(BasicNewsRecipe):
title = u'Nachdenkseiten'
__author__ = 'jrda'
publisher = 'www.nachdenkseiten.de Albrecht Mueller und Dr. Wolfgang Lieb'
description = 'NachDenkSeiten - Die kritische Website'
category = 'news'
oldest_article = 7
use_embedded_content = False
language = 'de'
timefmt = ''
max_articles_per_feed = 6
no_stylesheets = True
encoding = 'utf-8'
remove_javascript = True
keep_only_tags = [
{'id':'content'}]
feeds = [
('News', 'http://www.nachdenkseiten.de/?feed=rss2'),
]

View File

@ -0,0 +1,21 @@
__license__ = 'GPL v3'
__author__ = 'Vakya'
__version__ = 'v1.0'
__date__ = '14, May 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
title = u'National Geographic'
publisher = u'National Geographic'
__author__ = 'Vakya'
description = 'Revista National Geographic - Últimas noticias'
language = 'es'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
remove_tags_before = dict(name='p' , attrs={'class':['image']})
remove_tags_after = dict(name='hr')
feeds = [(u'Vida salvage', u'http://www.nationalgeographic.com.es/feeds/rss.html')]

View File

@ -0,0 +1,16 @@
__version__ = 'v1.0'
__date__ = '5, May 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
title = u'National Geographic'
__author__ = 'faber1971'
description = 'Science magazine'
language = 'it'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
remove_tags = [dict(name='div',attrs={'class':'banner-abbonamenti'})]
feeds = [(u'National Geographic', u'http://www.nationalgeographic.it/rss/all/rss2.0.xml')]

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class NYTimes(BasicNewsRecipe): class NYTimes(BasicNewsRecipe):
@ -11,22 +10,8 @@ class NYTimes(BasicNewsRecipe):
needs_subscription = False needs_subscription = False
no_stylesheets = True no_stylesheets = True
#remove_tags_before = dict(name='h1', attrs={'class':'heading'}) auto_cleanup = True
remove_tags_after = dict(name='div', attrs={'class':'npStoryTools npWidth1-6 npRight npTxtStrong'}) auto_cleanup_keep = '//*[@class="npStoryPhoto npTxtPlain"]'
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['story-tools', 'npStoryTools npWidth1-6 npRight npTxtStrong']}),
#dict(name='div', attrs={'id':['qrformdiv', 'inSection', 'alpha-inner']}),
#dict(name='form', attrs={'onsubmit':''}),
dict(name='ul', attrs={'class':'npTxtAlt npGroup npTxtCentre npStoryShare npTxtStrong npTxtDim'}),
]
# def preprocess_html(self, soup):
# table = soup.find('table')
# if table is not None:
# table.extract()
# return soup
#TO GET ARTICLE TOC #TO GET ARTICLE TOC
@ -53,14 +38,14 @@ class NYTimes(BasicNewsRecipe):
if current_section is not None and x.name == 'h5': if current_section is not None and x.name == 'h5':
# Article found # Article found
title = self.tag_to_string(x) title = self.tag_to_string(x)
a = x.find('a', href=lambda x: x and 'story' in x) a = x.find('a', href=True)
if a is None: if a is None:
continue continue
url = a.get('href', False) url = a.get('href', False)
if not url or not title: if not url or not title:
continue continue
#if url.startswith('story'): #if url.startswith('story'):
url = 'http://www.nationalpost.com/todays-paper/'+url #url = 'http://www.nationalpost.com/todays-paper/'+url
self.log('\t\tFound article:', title) self.log('\t\tFound article:', title)
self.log('\t\t\t', url) self.log('\t\t\t', url)
current_articles.append({'title': title, 'url':url, current_articles.append({'title': title, 'url':url,
@ -70,11 +55,4 @@ class NYTimes(BasicNewsRecipe):
feeds.append((current_section, current_articles)) feeds.append((current_section, current_articles))
return feeds return feeds
def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'id':'npContentMain'})
##td = heading.findParent(name='td')
##td.extract()
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body')
body.insert(0, story)
return soup

42
recipes/navy_times.recipe Normal file
View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NavyTimes(BasicNewsRecipe):
title = 'Navy Times'
__author__ = 'jde'
__date__ = '16 May 2012'
__version__ = '1.0'
description = 'News of the U.S. Navy'
language = 'en'
publisher = 'NavyTimes.com'
category = 'news, U.S. Navy'
tags = 'news, U.S. Navy'
cover_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
masthead_url = 'http://www.navytimes.com/images/logo_navytimes_alert.jpg'
oldest_article = 7 #days
max_articles_per_feed = 25
publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = True
feeds = [
('News', 'http://www.navytimes.com/rss_news.php'),
('Benefits', 'http://www.navytimes.com/rss_benefits.php'),
('Money', 'http://www.navytimes.com/rss_money.php'),
('Careers & Education', 'http://www.navytimes.com/rss_careers.php'),
('Community', 'http://www.navytimes.com/rss_community.php'),
('Off Duty', 'http://www.navytimes.com/rss_off_duty.php'),
('Entertainment', 'http://www.navytimes.com/rss_entertainment.php'),
('Guard & Reserve', 'http://www.navytimes.com/rss_guard.php'),
]

View File

@ -0,0 +1,20 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NewsBusters(BasicNewsRecipe):
title = u'News Busters'
description = 'Exposing and Combating Liberal Media Bias'
__author__ = 'jde'
oldest_article = 1#day
max_articles_per_feed = 100
cover_url = "http://newsbusters.org/sites/all/themes/genesis_nb/images/nb-mrc.png"
language = 'en'
encoding = 'utf8'
needs_subscription = False
remove_javascript = True
recursions = 0
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [(u'Blog', u'http://www.newsbusters.org/rss.xml')]

View File

@ -102,7 +102,7 @@ class Newsweek(BasicNewsRecipe):
if len(options) > self.BACK_ISSUES: if len(options) > self.BACK_ISSUES:
option = options[self.BACK_ISSUES]; option = options[self.BACK_ISSUES];
self.EDITION = option['value'].replace('http://www.newsweek.pl/wydania/','') self.EDITION = option['value'].replace('http://www.newsweek.pl/wydania/','')
issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION) self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
else: else:
self.BACK_ISSUES = self.BACK_ISSUES - len(options) self.BACK_ISSUES = self.BACK_ISSUES - len(options)
self.YEAR = self.YEAR - 1 self.YEAR = self.YEAR - 1

View File

@ -9,10 +9,10 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe): class Pescanik(BasicNewsRecipe):
title = 'Peščanik' title = u'Peščanik'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH' description = u'Peščanik je udruženje građana osnovano 2006. godine. Glavni proizvod Peščanika je radio emisija koja je emitovana na Radiju B92 od 02.02.2000. do 16.06.2011, a od septembra 2011. se emituje na osam radio stanica u Srbiji, Crnoj Gori i BiH'
publisher = 'Peščanik' publisher = u'Peščanik'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
oldest_article = 10 oldest_article = 10
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -1,5 +1,5 @@
""" """
Pocket Calibre Recipe v1.0 Pocket Calibre Recipe v1.2
""" """
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = ''' __copyright__ = '''
@ -73,6 +73,9 @@ class Pocket(BasicNewsRecipe):
articles = [] articles = []
soup = self.index_to_soup(feedurl) soup = self.index_to_soup(feedurl)
ritem = soup.find('ul', attrs={'id':'list'}) ritem = soup.find('ul', attrs={'id':'list'})
if ritem is None:
self.log.exception("Page %s skipped: invalid HTML" % (feedtitle if feedtitle else feedurl))
continue
for item in reversed(ritem.findAll('li')): for item in reversed(ritem.findAll('li')):
if articlesToGrab < 1: if articlesToGrab < 1:
break break
@ -94,7 +97,12 @@ class Pocket(BasicNewsRecipe):
self.readList.append(readLink) self.readList.append(readLink)
totalfeeds.append((feedtitle, articles)) totalfeeds.append((feedtitle, articles))
if len(self.readList) < self.minimum_articles: if len(self.readList) < self.minimum_articles:
raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") self.mark_as_read_after_dl = False
if hasattr(self, 'abort_recipe_processing'):
self.abort_recipe_processing("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
else:
self.log.exception("Only %d articles retrieved, minimum_articles not reached" % len(self.readList))
return []
return totalfeeds return totalfeeds
def mark_as_read(self, markList): def mark_as_read(self, markList):

34
recipes/rebelion.recipe Normal file
View File

@ -0,0 +1,34 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
import re
class RebelionRecipe (BasicNewsRecipe):
__author__ = u'Marc Busqué <marc@lamarciana.com>' #Thanks to atlantique http://www.mobileread.com/forums/member.php?u=67876
__url__ = 'http://www.lamarciana.com'
__version__ = '1.0'
__license__ = 'GPL v3'
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'Rebelion.org'
description = u'Rebelión pretende ser un medio de información alternativa que publique las noticias que no son consideradas importantes por los medios de comunicación tradicionales. También, dar a las noticias un tratamiento diferente en la línea de mostrar los intereses que los poderes económicos y políticos del mundo capitalista ocultan para mantener sus privilegios y el status actual. Queremos servir y ayudarnos de todos los grupos, colectivos y personas que trabajan por cambiar este mundo en una perspectiva radicalmente diferente, más justa, igualitaria y equilibrada social y ecológicamente. Es nuestro objetivo contar con la participación y colaboración de todos vosotros para que Rebelión sea un espacio serio, riguroso y actualizado en la difusión de noticias.'
url = 'http://www.rebelion.org'
language = 'es'
tags = 'contrainformación, información alternativa'
oldest_article = 1
remove_empty_feeds = True
encoding = 'latin1' #
keep_only_tags = [
{'name': 'div', 'attrs': {'id': 'CuerpoNoticia'}}
]
no_stylesheets = True
extra_css = '.autor {font-style: italic;} .titulo {font-size: 150%;} .titulo, .pretitulo {text-align: center;} #TextoNoticia {text-align:justify;} .autor, .fuente, .entradilla {font-size: 90%; text-align: left;}'
feeds = [
(u'Titulares del día', u'http://www.rebelion.org/rss_portada.php'),
]
#See http://www.mobileread.com/forums/showthread.php?t=174501
def print_version(self, url):
id = re.compile('\d*$').search(url).group()
return u'http://www.rebelion.org/noticia.php?id=%s' % id

View File

@ -0,0 +1,22 @@
__license__ = 'GPL v3'
__author__ = 'Vakya'
__version__ = 'v1.0'
__date__ = '14, May 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1336226255(BasicNewsRecipe):
title = u'Revista Summa'
publisher = u'Summa'
__author__ = 'Vakya'
description = 'Informacion regional sobre economia y negocios'
language = 'es'
oldest_article = 15
max_articles_per_feed = 100
auto_cleanup = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='label')
feeds = [(u'Revista Summa', u'http://www.revistasumma.com/rss/rss-v2.0.rss')]

61
recipes/shortlist.recipe Normal file
View File

@ -0,0 +1,61 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324663493(BasicNewsRecipe):
title = u'Shortlist'
description = 'Articles From Shortlist.com'
# I've set oldest article to 7 days as the website updates weekly
oldest_article = 7
max_articles_per_feed = 12
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
__author__ = 'Dave Asbury'
# last updated 19/5/12
language = 'en_GB'
def get_cover_url(self):
soup = self.index_to_soup('http://www.shortlist.com')
cov = soup.find(attrs={'width' : '121'})
#print '******** ',cov,' ***'
#cover_url = 'http://www.shortlist.com'+cov['src']
cover_url =cov['src']
return cover_url
masthead_url = 'http://www.mediauk.com/logos/100/344096.png'
preprocess_regexps = [
(re.compile(r'…or.*?email to your friends</a>.', re.IGNORECASE | re.DOTALL), lambda match: '')]
keep_only_tags = [
#dict(name='h1'),
dict(name='h2',attrs={'class' : 'title'}),
dict(name='h3',atts={'class' : 'subheading'}),
dict(attrs={'class' : [ 'hero-static','stand-first']}),
dict(attrs={'class' : 'hero-image'}),
dict(name='div',attrs={'id' : ['list','article','article alternate']}),
dict(name='div',attrs={'class' : 'stand-first'}),
]
remove_tags = [dict(name='h2',attrs={'class' : 'graphic-header'}),
dict(attrs={'id' : ['share','twitter','facebook','digg','delicious','facebook-like']}),
dict(atts={'class' : ['related-content','related-content-item','related-content horizontal','more']}),
]
remove_tags_after = [dict(name='p',attrs={'id' : 'tags'})
]
feeds = [
(u'Home carousel',u'http://feed43.com/7106317222455380.xml'),
(u'This Weeks Issue', u'http://feed43.com/0323588208751786.xml'),
(u'Cool Stuff',u'http://feed43.com/6253845228768456.xml'),
(u'Style',u'http://feed43.com/7217107577215678.xml'),
(u'Films',u'http://feed43.com/3101308515277265.xml'),
(u'Music',u'http://feed43.com/2416400550560162.xml'),
(u'TV',u'http://feed43.com/4781172470717123.xml'),
(u'Sport',u'http://feed43.com/5303151885853308.xml'),
(u'Gaming',u'http://feed43.com/8883764600355347.xml'),
(u'Women',u'http://feed43.com/2648221746514241.xml'),
(u'Instant Improver', u'http://feed43.com/1236541026275417.xml'),
#(u'Articles', u'http://feed43.com/3428534448355545.xml')
]

View File

@ -1,3 +1,4 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
''' '''
@ -15,6 +16,8 @@ class Spiegel_int(BasicNewsRecipe):
language = 'en_DE' language = 'en_DE'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
auto_cleanup = True
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
encoding = 'cp1252' encoding = 'cp1252'
publisher = 'SPIEGEL ONLINE GmbH' publisher = 'SPIEGEL ONLINE GmbH'
category = 'news, politics, Germany' category = 'news, politics, Germany'
@ -43,25 +46,25 @@ class Spiegel_int(BasicNewsRecipe):
.spPhotoGallery{font-size:x-small; color:#990000 ;} .spPhotoGallery{font-size:x-small; color:#990000 ;}
''' '''
keep_only_tags = [dict(attrs={'id':'spArticleContent'})] #keep_only_tags = [dict(attrs={'id':'spArticleContent'})]
remove_tags_after = dict(attrs={'id':'spArticleBody'}) #remove_tags_after = dict(attrs={'id':'spArticleBody'})
remove_tags = [dict(name=['meta','base','iframe','embed','object'])] #remove_tags = [dict(name=['meta','base','iframe','embed','object'])]
remove_attributes = ['clear'] #remove_attributes = ['clear']
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')] feeds = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]
def print_version(self, url): #def print_version(self, url):
main, sep, rest = url.rpartition(',') #main, sep, rest = url.rpartition(',')
rmain, rsep, rrest = main.rpartition(',') #rmain, rsep, rrest = main.rpartition(',')
return rmain + ',druck-' + rrest + ',' + rest #return rmain + ',druck-' + rrest + ',' + rest
def preprocess_html(self, soup): #def preprocess_html(self, soup):
for item in soup.findAll(style=True): #for item in soup.findAll(style=True):
del item['style'] #del item['style']
for item in soup.findAll('a'): #for item in soup.findAll('a'):
if item.string is not None: #if item.string is not None:
str = item.string #str = item.string
item.replaceWith(str) #item.replaceWith(str)
else: #else:
str = self.tag_to_string(item) #str = self.tag_to_string(item)
item.replaceWith(str) #item.replaceWith(str)
return soup #return soup

View File

@ -6,7 +6,6 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
spiegel.de spiegel.de
''' '''
from time import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Spiegel_ger(BasicNewsRecipe): class Spiegel_ger(BasicNewsRecipe):
@ -21,6 +20,8 @@ class Spiegel_ger(BasicNewsRecipe):
lang = 'de-DE' lang = 'de-DE'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
auto_cleanup = True
auto_cleanup_keep = '//*[@id="spArticleTopAsset"]'
encoding = 'cp1252' encoding = 'cp1252'
conversion_options = { conversion_options = {
@ -31,20 +32,9 @@ class Spiegel_ger(BasicNewsRecipe):
} }
keep_only_tags = [dict(name='div', attrs={'id':'spArticleContent'})]
remove_tags = [dict(name=['object','link','base','iframe'])]
remove_tags_after = dict(name='div', attrs={'id':'spArticleBody'})
feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')] feeds = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/index.rss')]
def print_version(self, url):
rmt = url.rpartition('#')[0]
main, sep, rest = rmt.rpartition(',')
rmain, rsep, rrest = main.rpartition(',')
purl = rmain + ',druck-' + rrest + ',' + rest
return purl
def get_cover_url(self):
return 'http://wissen.spiegel.de/wissen/titel/SP/' + strftime("%Y/%W/%j/titel.jpg")

View File

@ -0,0 +1,39 @@
''' Stars and Stripes
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1308791026(BasicNewsRecipe):
title = u'Stars and Stripes'
oldest_article = 3
max_articles_per_feed = 100
__author__ = 'adoucette'
description = 'The U.S. militarys independent news source, featuring exclusive reports from Iraq, Afghanistan, Europe and the Far East.'
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'stripes.com'
category = 'news, US, world'
language = 'en'
publication_type = 'newsportal'
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
keep_only_tags = [dict(name='div', attrs={'class':['element article']})]
remove_tags_after = [dict(name='ul', attrs={'class':'inline-bookmarks'})]
feeds = [
(u'News', u'http://feeds.stripes.com/starsandstripes/news'),
(u'Sports', u'http://feeds.stripes.com/starsandstripes/sports'),
(u'Military Life', u'http://feeds.stripes.com/starsandstripes/militarylife'),
(u'Opinion', u'http://feeds.stripes.com/starsandstripes/opinion'),
(u'Travel', u'http://feeds.stripes.com/starsandstripes/travel')
]

View File

@ -0,0 +1,92 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
www.strategic-culture.org
'''
import time
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
class StrategicCulture(BasicNewsRecipe):
title = 'Strategic Culture Foundation'
__author__ = 'Darko Miletic'
description = 'Online Journal'
publisher = 'Strategic Culture Foundation'
category = 'news, politics'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
publication_type = 'newsportal'
masthead_url = 'http://www.strategic-culture.org/img/logo.jpg'
extra_css = '''
body{font-family: Arial, sans-serif}
h1{font-family: "Times New Roman",Times,serif}
img{margin-bottom: 0.8em}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [
dict(name=['h1','p'])
,dict(name='div', attrs={'id':'cke_pastebin'})
]
remove_tags = [dict(name=['object','link','base','meta','iframe'])]
feeds = [
(u'News' , u'http://www.strategic-culture.org/blocks/news.html' )
,(u'Politics' , u'http://www.strategic-culture.org/rubrics/politics.html' )
,(u'Economics' , u'http://www.strategic-culture.org/rubrics/economics.html' )
,(u'History & Culture', u'http://www.strategic-culture.org/rubrics/history-and-culture.html')
,(u'Columnists' , u'http://www.strategic-culture.org/rubrics/columnists.html' )
]
def print_version(self, url):
return url.replace('-culture.org/news/','-culture.org/pview/')
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
if feedurl.endswith('news.html'):
clname = 'sini14'
else:
clname = 'h22'
checker = []
for item in soup.findAll('a', attrs={'class':clname}):
atag = item
url = atag['href']
title = self.tag_to_string(atag)
description = ''
daypart = url.rpartition('/')[0]
mpart,sep,day = daypart.rpartition('/')
ypart,sep,month = mpart.rpartition('/')
year = ypart.rpartition('/')[2]
date = strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(day + "/" + month + "/" + year, "%d/%m/%Y"))
if url not in checker:
checker.append(url)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

View File

@ -1,8 +1,9 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2012, mkydgr'
''' '''
www.wired.com www.wired.com
based on the (broken) built-in recipe by Darko Miletic <darko.miletic at gmail.com>
''' '''
import re import re
@ -11,11 +12,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Wired(BasicNewsRecipe): class Wired(BasicNewsRecipe):
title = 'Wired Magazine' title = 'Wired Magazine'
__author__ = 'Darko Miletic' __author__ = 'mkydgr'
description = 'Gaming news' description = 'Technology News'
publisher = 'Conde Nast Digital' publisher = 'Conde Nast Digital'
category = 'news, games, IT, gadgets' category = ''
oldest_article = 32 oldest_article = 500
delay = 1 delay = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
@ -25,7 +26,8 @@ class Wired(BasicNewsRecipe):
language = 'en' language = 'en'
publication_type = 'magazine' publication_type = 'magazine'
extra_css = ' body{font-family: Arial,Verdana,sans-serif} .entryDescription li {display: inline; list-style-type: none} ' extra_css = ' body{font-family: Arial,Verdana,sans-serif} .entryDescription li {display: inline; list-style-type: none} '
index = 'http://www.wired.com/magazine/' index = 'http://www.wired.com/magazine'
departments = ['features','start','test','play','found', 'reviews']
preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')] preprocess_regexps = [(re.compile(r'<meta name="Title".*<title>', re.DOTALL|re.IGNORECASE),lambda match: '<title>')]
conversion_options = { conversion_options = {
@ -38,80 +40,53 @@ class Wired(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'class':'post'})] keep_only_tags = [dict(name='div', attrs={'class':'post'})]
remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'}) remove_tags_after = dict(name='div', attrs={'class':'tweetmeme_button'})
remove_tags = [ remove_tags = [
dict(name=['object','embed','iframe','link','meta','base']) dict(name=['object','embed','iframe','link'])
,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']}) ,dict(name='div', attrs={'class':['podcast_storyboard','tweetmeme_button']})
,dict(attrs={'id':'ff_bottom_nav'}) ,dict(attrs={'id':'ff_bottom_nav'})
,dict(name='a',attrs={'href':'http://www.wired.com/app'}) ,dict(name='a',attrs={'href':'http://www.wired.com/app'})
] ]
remove_attributes = ['height','width','lang','border','clear'] remove_attributes = ['height','width']
def parse_index(self): def parse_index(self):
totalfeeds = [] totalfeeds = []
soup = self.index_to_soup(self.index) soup = self.index_to_soup(self.index)
majorf = soup.find('div',attrs={'class':'index'})
if majorf:
pfarticles = []
firsta = majorf.find(attrs={'class':'spread-header'})
if firsta:
pfarticles.append({
'title' :self.tag_to_string(firsta.a)
,'date' :strftime(self.timefmt)
,'url' :'http://www.wired.com' + firsta.a['href']
,'description':''
})
for itt in majorf.findAll('li'):
itema = itt.find('a',href=True)
if itema:
pfarticles.append({
'title' :self.tag_to_string(itema)
,'date' :strftime(self.timefmt)
,'url' :'http://www.wired.com' + itema['href']
,'description':''
})
totalfeeds.append(('Cover', pfarticles))
features = soup.find('div',attrs={'id':'my-glider'})
if features:
farticles = []
for item in features.findAll('div',attrs={'class':'section'}):
divurl = item.find('div',attrs={'class':'feature-header'})
if divurl:
divdesc = item.find('div',attrs={'class':'feature-text'})
url = divurl.a['href']
if not divurl.a['href'].startswith('http://www.wired.com'):
url = 'http://www.wired.com' + divurl.a['href']
title = self.tag_to_string(divurl.a)
description = self.tag_to_string(divdesc)
date = strftime(self.timefmt)
farticles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
totalfeeds.append(('Featured Articles', farticles))
#department feeds #department feeds
departments = ['rants','start','test','play','found'] depts = soup.find('div',attrs={'id':'department-posts'})
dept = soup.find('div',attrs={'id':'magazine-departments'})
if dept: if depts:
for ditem in departments: for ditem in self.departments:
darticles = [] darticles = []
department = dept.find('div',attrs={'id':'department-'+ditem}) department = depts.find('h3',attrs={'id':'department-'+ditem})
if department: if department:
for item in department.findAll('div'): #print '\n###### Found department %s ########'%(ditem)
description = ''
feed_link = item.find('a') el = department.next
while el and (el.__class__.__name__ == 'NavigableString' or el.name != 'h3'):
if el.__class__.__name__ != 'NavigableString':
#print '\t ... element',el.name
if el.name == 'ul':
for artitem in el.findAll('li'):
#print '\t\t ... article',repr(artitem)
feed_link = artitem.find('a')
#print '\t\t\t ... link',repr(feed_link)
if feed_link and feed_link.has_key('href'): if feed_link and feed_link.has_key('href'):
url = feed_link['href'] url = self.makeurl(feed_link['href'])
title = self.tag_to_string(feed_link) title = self.tag_to_string(feed_link)
date = strftime(self.timefmt) date = strftime(self.timefmt)
#print '\t\t ... found "%s" %s'%(title,url)
darticles.append({ darticles.append({
'title' :title 'title' :title
,'date' :date ,'date' :date
,'url' :url ,'url' :url
,'description':description ,'description':''
}) })
el = None
else:
el = el.next
totalfeeds.append((ditem.capitalize(), darticles)) totalfeeds.append((ditem.capitalize(), darticles))
return totalfeeds return totalfeeds
@ -120,7 +95,7 @@ class Wired(BasicNewsRecipe):
soup = self.index_to_soup(self.index) soup = self.index_to_soup(self.index)
cover_item = soup.find('div',attrs={'class':'spread-image'}) cover_item = soup.find('div',attrs={'class':'spread-image'})
if cover_item: if cover_item:
cover_url = 'http://www.wired.com' + cover_item.a.img['src'] cover_url = self.makeurl(cover_item.a.img['src'])
return cover_url return cover_url
def print_version(self, url): def print_version(self, url):
@ -129,17 +104,10 @@ class Wired(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
for item in soup.findAll('a'):
if item.string is not None:
tstr = item.string
item.replaceWith(tstr)
else:
item.name='span'
for atrs in ['href','target','alt','title','name','id']:
if item.has_key(atrs):
del item[atrs]
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup return soup
def makeurl(self, addr):
if addr[:4] != 'http' : addr='http://www.wired.com' + addr
while addr[-2:] == '//' : addr=addr[:-1]
return addr

Binary file not shown.

View File

@ -490,12 +490,6 @@ save_original_format = True
# how many should be shown, here. # how many should be shown, here.
gui_view_history_size = 15 gui_view_history_size = 15
#: When using the 'Tweak Book' action, which format to prefer
# When tweaking a book that has multiple formats, calibre picks one
# automatically. By default EPUB is preferred to HTMLZ. If you would like to
# prefer HTMLZ to EPUB for tweaking, change this to 'htmlz'
tweak_book_prefer = 'epub'
#: Change the font size of book details in the interface #: Change the font size of book details in the interface
# Change the font size at which book details are rendered in the side panel and # Change the font size at which book details are rendered in the side panel and
# comments are rendered in the metadata edit dialog. Set it to a positive or # comments are rendered in the metadata edit dialog. Set it to a positive or
@ -512,3 +506,17 @@ change_book_details_font_size_by = 0
# No compile: compile_gpm_templates = False # No compile: compile_gpm_templates = False
compile_gpm_templates = True compile_gpm_templates = True
#: What format to default to when using the Tweak feature
# The Tweak feature of calibre allows direct editing of a book format.
# If multiple formats are available, calibre will offer you a choice
# of formats, defaulting to your preferred output format if it is available.
# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
# to that format rather than your output format preference.
# Set to a value of 'remember' to use whichever format you chose last time you
# used the Tweak feature.
# Examples:
# default_tweak_format = None (Use output format)
# default_tweak_format = 'EPUB'
# default_tweak_format = 'remember'
default_tweak_format = None

View File

@ -1,30 +1,17 @@
" Project wide builtins " Project wide builtins
let $PYFLAKES_BUILTINS = "_,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext" let $PYFLAKES_BUILTINS = "_,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext"
python << EOFPY fun! CalibreLog()
import os, sys " Setup buffers to edit the calibre changelog and version info prior to
" making a release.
enew
read ! bzr log -l 500
set nomodifiable noswapfile buftype=nofile
edit Changelog.yaml
edit src/calibre/constants.py
endfun
import vipy nnoremap \log :call CalibreLog()<CR>
source_file = vipy.vipy.eval('expand("<sfile>")') python import init_calibre
project_dir = os.path.dirname(source_file) python import calibre
src_dir = os.path.abspath(os.path.join(project_dir, 'src'))
base_dir = os.path.join(src_dir, 'calibre')
sys.path.insert(0, src_dir)
sys.resources_location = os.path.join(project_dir, 'resources')
sys.extensions_location = os.path.join(base_dir, 'plugins')
sys.executables_location = os.environ.get('CALIBRE_EXECUTABLES_PATH', '/usr/bin')
vipy.session.initialize(project_name='calibre', src_dir=src_dir,
project_dir=project_dir, base_dir=project_dir)
def recipe_title_callback(raw):
return eval(raw.decode('utf-8')).replace(' ', '_')
vipy.session.add_content_browser('<leader>r', 'Recipe',
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
EOFPY
nmap \log :enew<CR>:read ! bzr log -l 500 <CR>:e Changelog.yaml<CR>:e src/calibre/constants.py<CR>

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, socket, struct, subprocess import os, socket, struct, subprocess, glob
from distutils.spawn import find_executable from distutils.spawn import find_executable
from PyQt4 import pyqtconfig from PyQt4 import pyqtconfig
@ -120,7 +120,7 @@ if iswindows:
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir) poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', sw_lib_dir)
popplerqt4_lib_dirs = poppler_lib_dirs popplerqt4_lib_dirs = poppler_lib_dirs
poppler_libs = ['poppler'] poppler_libs = ['poppler']
magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.6.6')] magick_inc_dirs = [os.path.join(prefix, 'build', 'ImageMagick-6.7.6')]
magick_lib_dirs = [os.path.join(magick_inc_dirs[0], 'VisualMagick', 'lib')] magick_lib_dirs = [os.path.join(magick_inc_dirs[0], 'VisualMagick', 'lib')]
magick_libs = ['CORE_RL_wand_', 'CORE_RL_magick_'] magick_libs = ['CORE_RL_wand_', 'CORE_RL_magick_']
podofo_inc = os.path.join(sw_inc_dir, 'podofo') podofo_inc = os.path.join(sw_inc_dir, 'podofo')
@ -128,8 +128,9 @@ if iswindows:
elif isosx: elif isosx:
fc_inc = '/sw/include/fontconfig' fc_inc = '/sw/include/fontconfig'
fc_lib = '/sw/lib' fc_lib = '/sw/lib'
poppler = glob.glob('/sw/build/poppler-*')[-1]
poppler_inc_dirs = consolidate('POPPLER_INC_DIR', poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
'/sw/build/poppler-0.14.5/poppler:/sw/build/poppler-0.14.5') '{0}/poppler:{0}'.format(poppler))
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR', poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
'/sw/lib') '/sw/lib')
poppler_libs = ['poppler'] poppler_libs = ['poppler']
@ -191,6 +192,9 @@ else:
lh = os.path.join(poppler_inc_dirs[0], 'Link.h') lh = os.path.join(poppler_inc_dirs[0], 'Link.h')
if 'class AnnotLink' not in open(lh, 'rb').read(): if 'class AnnotLink' not in open(lh, 'rb').read():
poppler_cflags.append('-DPOPPLER_OLD_LINK_TYPE') poppler_cflags.append('-DPOPPLER_OLD_LINK_TYPE')
ph = os.path.join(poppler_inc_dirs[0], 'Page.h')
if 'getLinks(Catalog' in open(ph, 'rb').read():
poppler_cflags.append('-DPOPPLER_PRE_20')
magick_error = None magick_error = None
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0], if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],

View File

@ -22,7 +22,8 @@ Do not modify it unless you know what you are doing.
import sys, os import sys, os
path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r}) path = os.environ.get('CALIBRE_PYTHON_PATH', {path!r})
sys.path.insert(0, path) if path not in sys.path:
sys.path.insert(0, path)
sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r}) sys.resources_location = os.environ.get('CALIBRE_RESOURCES_PATH', {resources!r})
sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r}) sys.extensions_location = os.environ.get('CALIBRE_EXTENSIONS_PATH', {extensions!r})

View File

@ -32,7 +32,7 @@ binary_includes = [
'/lib/libz.so.1', '/lib/libz.so.1',
'/usr/lib/libtiff.so.5', '/usr/lib/libtiff.so.5',
'/lib/libbz2.so.1', '/lib/libbz2.so.1',
'/usr/lib/libpoppler.so.7', '/usr/lib/libpoppler.so.25',
'/usr/lib/libxml2.so.2', '/usr/lib/libxml2.so.2',
'/usr/lib/libopenjpeg.so.2', '/usr/lib/libopenjpeg.so.2',
'/usr/lib/libxslt.so.1', '/usr/lib/libxslt.so.1',
@ -41,8 +41,8 @@ binary_includes = [
'/usr/lib/libgthread-2.0.so.0', '/usr/lib/libgthread-2.0.so.0',
'/usr/lib/libpng14.so.14', '/usr/lib/libpng14.so.14',
'/usr/lib/libexslt.so.0', '/usr/lib/libexslt.so.0',
MAGICK_PREFIX+'/lib/libMagickWand.so.4', MAGICK_PREFIX+'/lib/libMagickWand.so.5',
MAGICK_PREFIX+'/lib/libMagickCore.so.4', MAGICK_PREFIX+'/lib/libMagickCore.so.5',
'/usr/lib/libgcrypt.so.11', '/usr/lib/libgcrypt.so.11',
'/usr/lib/libgpg-error.so.0', '/usr/lib/libgpg-error.so.0',
'/usr/lib/libphonon.so.4', '/usr/lib/libphonon.so.4',

View File

@ -385,7 +385,7 @@ class Py2App(object):
@flush @flush
def add_poppler(self): def add_poppler(self):
info('\nAdding poppler') info('\nAdding poppler')
for x in ('libpoppler.7.dylib',): for x in ('libpoppler.25.dylib',):
self.install_dylib(os.path.join(SW, 'lib', x)) self.install_dylib(os.path.join(SW, 'lib', x))
self.install_dylib(os.path.join(SW, 'bin', 'pdftohtml'), False) self.install_dylib(os.path.join(SW, 'bin', 'pdftohtml'), False)
@ -429,7 +429,7 @@ class Py2App(object):
def add_imagemagick(self): def add_imagemagick(self):
info('\nAdding ImageMagick') info('\nAdding ImageMagick')
for x in ('Wand', 'Core'): for x in ('Wand', 'Core'):
self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.4.dylib'%x)) self.install_dylib(os.path.join(SW, 'lib', 'libMagick%s.5.dylib'%x))
idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1] idir = glob.glob(os.path.join(SW, 'lib', 'ImageMagick-*'))[-1]
dest = os.path.join(self.frameworks_dir, 'ImageMagick') dest = os.path.join(self.frameworks_dir, 'ImageMagick')
if os.path.exists(dest): if os.path.exists(dest):

View File

@ -18,7 +18,7 @@ QT_DIR = 'Q:\\Qt\\4.8.1'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns'] QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
SW = r'C:\cygwin\home\kovid\sw' SW = r'C:\cygwin\home\kovid\sw'
IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.6.6', IMAGEMAGICK = os.path.join(SW, 'build', 'ImageMagick-6.7.6',
'VisualMagick', 'bin') 'VisualMagick', 'bin')
CRT = r'C:\Microsoft.VC90.CRT' CRT = r'C:\Microsoft.VC90.CRT'

View File

@ -295,7 +295,7 @@ NOTE: poppler must be built as a static library, unless you build the qt4 bindin
Now do the same for the pdftohtml project Now do the same for the pdftohtml project
cp poppler/*.h ~/sw/include/poppler && cp goo/*.h ~/sw/include/poppler/goo && cp splash/*.h ~/sw/include/poppler/splash && cp build/Release/poppler.lib ../../lib/ && cp build/utils/Release/*.exe ../../bin/ cp poppler/*.h ~/sw/include/poppler && cp goo/*.h ~/sw/include/poppler/goo && cp splash/*.h ~/sw/include/poppler/splash && cp build/Release/poppler.lib ../../lib/ && cp build/utils/Release/pdftohtml.exe ../../bin/
podofo podofo
@ -336,6 +336,8 @@ Index: src/PdfFiltersPrivate.cpp
ImageMagick ImageMagick
-------------- --------------
Get the source from: http://www.imagemagick.org/download/windows/ImageMagick-windows.zip
Edit VisualMagick/configure/configure.cpp to set Edit VisualMagick/configure/configure.cpp to set
int projectType = MULTITHREADEDDLL; int projectType = MULTITHREADEDDLL;
@ -349,7 +351,10 @@ Edit magick/magick-config.h
Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE Undefine ProvideDllMain and MAGICKCORE_X11_DELEGATE
Now open VisualMagick/VisualDynamicMT.sln set to Release Now open VisualMagick/VisualDynamicMT.sln set to Release
Remove the CORE_xlib and UTIL_Imdisplay project CORE_Magick++ Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects.
F7 for build project, you will get one error due to the removal of xlib, ignore
it.
calibre calibre
--------- ---------

View File

@ -12,14 +12,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-04-12 09:56+0000\n" "PO-Revision-Date: 2012-05-03 16:09+0000\n"
"Last-Translator: Dídac Rios <didac@niorcs.com>\n" "Last-Translator: Dídac Rios <didac@niorcs.com>\n"
"Language-Team: Catalan <linux@softcatala.org>\n" "Language-Team: Catalan <linux@softcatala.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-04-13 05:26+0000\n" "X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
"X-Generator: Launchpad (build 15070)\n" "X-Generator: Launchpad (build 15195)\n"
"Language: ca\n" "Language: ca\n"
#. name for aaa #. name for aaa
@ -9536,7 +9536,7 @@ msgstr "Ani"
#. name for hni #. name for hni
msgid "Hani" msgid "Hani"
msgstr "" msgstr "Haní"
#. name for hnj #. name for hnj
msgid "Hmong Njua" msgid "Hmong Njua"
@ -9544,7 +9544,7 @@ msgstr "Miao; Hmong Njua"
#. name for hnn #. name for hnn
msgid "Hanunoo" msgid "Hanunoo"
msgstr "" msgstr "Hanunoo"
#. name for hno #. name for hno
msgid "Hindko; Northern" msgid "Hindko; Northern"
@ -9552,35 +9552,35 @@ msgstr "Hindko; septentrional"
#. name for hns #. name for hns
msgid "Hindustani; Caribbean" msgid "Hindustani; Caribbean"
msgstr "" msgstr "Hindustaní; Caribeny"
#. name for hnu #. name for hnu
msgid "Hung" msgid "Hung"
msgstr "" msgstr "Hung"
#. name for hoa #. name for hoa
msgid "Hoava" msgid "Hoava"
msgstr "" msgstr "Hoava"
#. name for hob #. name for hob
msgid "Mari (Madang Province)" msgid "Mari (Madang Province)"
msgstr "" msgstr "Mari (Província de Madang)"
#. name for hoc #. name for hoc
msgid "Ho" msgid "Ho"
msgstr "" msgstr "Ho"
#. name for hod #. name for hod
msgid "Holma" msgid "Holma"
msgstr "" msgstr "Holma"
#. name for hoe #. name for hoe
msgid "Horom" msgid "Horom"
msgstr "" msgstr "Horom"
#. name for hoh #. name for hoh
msgid "Hobyót" msgid "Hobyót"
msgstr "" msgstr "Hobyot"
#. name for hoi #. name for hoi
msgid "Holikachuk" msgid "Holikachuk"
@ -9588,11 +9588,11 @@ msgstr "Holikachuk"
#. name for hoj #. name for hoj
msgid "Hadothi" msgid "Hadothi"
msgstr "Hadothi" msgstr "Harautí"
#. name for hol #. name for hol
msgid "Holu" msgid "Holu"
msgstr "Holu" msgstr "Holo"
#. name for hom #. name for hom
msgid "Homa" msgid "Homa"
@ -9628,11 +9628,11 @@ msgstr "Honi"
#. name for hoy #. name for hoy
msgid "Holiya" msgid "Holiya"
msgstr "" msgstr "Holiya"
#. name for hoz #. name for hoz
msgid "Hozo" msgid "Hozo"
msgstr "" msgstr "Hozo"
#. name for hpo #. name for hpo
msgid "Hpon" msgid "Hpon"
@ -9644,7 +9644,7 @@ msgstr "Hawaià Pidgin; llenguatge de signes"
#. name for hra #. name for hra
msgid "Hrangkhol" msgid "Hrangkhol"
msgstr "Hrangkhol" msgstr "Hrangkol"
#. name for hre #. name for hre
msgid "Hre" msgid "Hre"
@ -9668,7 +9668,7 @@ msgstr "Horuru"
#. name for hrt #. name for hrt
msgid "Hértevin" msgid "Hértevin"
msgstr "Hértevin" msgstr "Hertevin"
#. name for hru #. name for hru
msgid "Hruso" msgid "Hruso"
@ -9724,7 +9724,7 @@ msgstr "Hitu"
#. name for htx #. name for htx
msgid "Hittite; Middle" msgid "Hittite; Middle"
msgstr "Hittite; Middle" msgstr "Hittita; mitjà"
#. name for hub #. name for hub
msgid "Huambisa" msgid "Huambisa"
@ -9732,7 +9732,7 @@ msgstr "Huambisa"
#. name for huc #. name for huc
msgid "=/Hua" msgid "=/Hua"
msgstr "" msgstr "Hua"
#. name for hud #. name for hud
msgid "Huaulu" msgid "Huaulu"
@ -9740,7 +9740,7 @@ msgstr "Huaulu"
#. name for hue #. name for hue
msgid "Huave; San Francisco Del Mar" msgid "Huave; San Francisco Del Mar"
msgstr "Huave; San Francisco Del Mar" msgstr "Huave; San Francisco"
#. name for huf #. name for huf
msgid "Humene" msgid "Humene"
@ -9756,7 +9756,7 @@ msgstr "Huilliche"
#. name for hui #. name for hui
msgid "Huli" msgid "Huli"
msgstr "Huli" msgstr "Hulí"
#. name for huj #. name for huj
msgid "Miao; Northern Guiyang" msgid "Miao; Northern Guiyang"
@ -9808,7 +9808,7 @@ msgstr "Huitoto; Murui"
#. name for huv #. name for huv
msgid "Huave; San Mateo Del Mar" msgid "Huave; San Mateo Del Mar"
msgstr "Huave; San Mateo Del Mar" msgstr "Huave; San Mateo"
#. name for huw #. name for huw
msgid "Hukumina" msgid "Hukumina"
@ -9820,35 +9820,35 @@ msgstr "Huitoto; Nüpode"
#. name for huy #. name for huy
msgid "Hulaulá" msgid "Hulaulá"
msgstr "" msgstr "Arameu; Hulaula"
#. name for huz #. name for huz
msgid "Hunzib" msgid "Hunzib"
msgstr "" msgstr "Hunzib"
#. name for hvc #. name for hvc
msgid "Haitian Vodoun Culture Language" msgid "Haitian Vodoun Culture Language"
msgstr "" msgstr "Haitià Vodoun"
#. name for hve #. name for hve
msgid "Huave; San Dionisio Del Mar" msgid "Huave; San Dionisio Del Mar"
msgstr "" msgstr "Huave; San Dionisio"
#. name for hvk #. name for hvk
msgid "Haveke" msgid "Haveke"
msgstr "" msgstr "Haveke"
#. name for hvn #. name for hvn
msgid "Sabu" msgid "Sabu"
msgstr "" msgstr "Sabu"
#. name for hvv #. name for hvv
msgid "Huave; Santa María Del Mar" msgid "Huave; Santa María Del Mar"
msgstr "" msgstr "Huave; Santa Maria"
#. name for hwa #. name for hwa
msgid "Wané" msgid "Wané"
msgstr "" msgstr "Wané"
#. name for hwc #. name for hwc
msgid "Creole English; Hawai'i" msgid "Creole English; Hawai'i"
@ -9856,11 +9856,11 @@ msgstr "Anglès crioll; Hawaii"
#. name for hwo #. name for hwo
msgid "Hwana" msgid "Hwana"
msgstr "" msgstr "Hwana"
#. name for hya #. name for hya
msgid "Hya" msgid "Hya"
msgstr "" msgstr "Hya"
#. name for hye #. name for hye
msgid "Armenian" msgid "Armenian"
@ -9868,79 +9868,79 @@ msgstr "armeni"
#. name for iai #. name for iai
msgid "Iaai" msgid "Iaai"
msgstr "" msgstr "Iaai"
#. name for ian #. name for ian
msgid "Iatmul" msgid "Iatmul"
msgstr "" msgstr "Iatmulès"
#. name for iap #. name for iap
msgid "Iapama" msgid "Iapama"
msgstr "" msgstr "Iapama"
#. name for iar #. name for iar
msgid "Purari" msgid "Purari"
msgstr "" msgstr "Purari"
#. name for iba #. name for iba
msgid "Iban" msgid "Iban"
msgstr "" msgstr "Iban"
#. name for ibb #. name for ibb
msgid "Ibibio" msgid "Ibibio"
msgstr "" msgstr "Ibibio"
#. name for ibd #. name for ibd
msgid "Iwaidja" msgid "Iwaidja"
msgstr "" msgstr "Iwaidja"
#. name for ibe #. name for ibe
msgid "Akpes" msgid "Akpes"
msgstr "" msgstr "Akpes"
#. name for ibg #. name for ibg
msgid "Ibanag" msgid "Ibanag"
msgstr "" msgstr "Ibanag"
#. name for ibi #. name for ibi
msgid "Ibilo" msgid "Ibilo"
msgstr "" msgstr "Ibilo"
#. name for ibl #. name for ibl
msgid "Ibaloi" msgid "Ibaloi"
msgstr "" msgstr "Ibaloi"
#. name for ibm #. name for ibm
msgid "Agoi" msgid "Agoi"
msgstr "" msgstr "Agoi"
#. name for ibn #. name for ibn
msgid "Ibino" msgid "Ibino"
msgstr "" msgstr "Ibino"
#. name for ibo #. name for ibo
msgid "Igbo" msgid "Igbo"
msgstr "" msgstr "Ibo"
#. name for ibr #. name for ibr
msgid "Ibuoro" msgid "Ibuoro"
msgstr "" msgstr "Ibuoro"
#. name for ibu #. name for ibu
msgid "Ibu" msgid "Ibu"
msgstr "" msgstr "Ibu"
#. name for iby #. name for iby
msgid "Ibani" msgid "Ibani"
msgstr "" msgstr "Ibani"
#. name for ica #. name for ica
msgid "Ede Ica" msgid "Ede Ica"
msgstr "" msgstr "Ede Ica"
#. name for ich #. name for ich
msgid "Etkywan" msgid "Etkywan"
msgstr "" msgstr "Etkywan"
#. name for icl #. name for icl
msgid "Icelandic Sign Language" msgid "Icelandic Sign Language"
@ -9952,7 +9952,7 @@ msgstr "Anglès crioll; Islander"
#. name for ida #. name for ida
msgid "Idakho-Isukha-Tiriki" msgid "Idakho-Isukha-Tiriki"
msgstr "" msgstr "Idakho-Isukha-Tiriki"
#. name for idb #. name for idb
msgid "Indo-Portuguese" msgid "Indo-Portuguese"
@ -9960,15 +9960,15 @@ msgstr "Indo-portuguès"
#. name for idc #. name for idc
msgid "Idon" msgid "Idon"
msgstr "" msgstr "Idon"
#. name for idd #. name for idd
msgid "Ede Idaca" msgid "Ede Idaca"
msgstr "" msgstr "Ede Idaca"
#. name for ide #. name for ide
msgid "Idere" msgid "Idere"
msgstr "" msgstr "Idere"
#. name for idi #. name for idi
msgid "Idi" msgid "Idi"
@ -9976,43 +9976,43 @@ msgstr ""
#. name for ido #. name for ido
msgid "Ido" msgid "Ido"
msgstr "" msgstr "ido"
#. name for idr #. name for idr
msgid "Indri" msgid "Indri"
msgstr "" msgstr "Indri"
#. name for ids #. name for ids
msgid "Idesa" msgid "Idesa"
msgstr "" msgstr "Idesa"
#. name for idt #. name for idt
msgid "Idaté" msgid "Idaté"
msgstr "" msgstr "Idaté"
#. name for idu #. name for idu
msgid "Idoma" msgid "Idoma"
msgstr "" msgstr "Idoma"
#. name for ifa #. name for ifa
msgid "Ifugao; Amganad" msgid "Ifugao; Amganad"
msgstr "" msgstr "Ifugao; Amganad"
#. name for ifb #. name for ifb
msgid "Ifugao; Batad" msgid "Ifugao; Batad"
msgstr "" msgstr "Ifugao; Batad"
#. name for ife #. name for ife
msgid "Ifè" msgid "Ifè"
msgstr "" msgstr "Ifè"
#. name for iff #. name for iff
msgid "Ifo" msgid "Ifo"
msgstr "" msgstr "Ifo"
#. name for ifk #. name for ifk
msgid "Ifugao; Tuwali" msgid "Ifugao; Tuwali"
msgstr "" msgstr "Ifugao; Tuwali"
#. name for ifm #. name for ifm
msgid "Teke-Fuumu" msgid "Teke-Fuumu"
@ -10020,15 +10020,15 @@ msgstr "Teke; Fuumu"
#. name for ifu #. name for ifu
msgid "Ifugao; Mayoyao" msgid "Ifugao; Mayoyao"
msgstr "" msgstr "Ifugao; Mayoyao"
#. name for ify #. name for ify
msgid "Kallahan; Keley-I" msgid "Kallahan; Keley-I"
msgstr "" msgstr "Kallahan; Keley-I"
#. name for igb #. name for igb
msgid "Ebira" msgid "Ebira"
msgstr "" msgstr "Ebira"
#. name for ige #. name for ige
msgid "Igede" msgid "Igede"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -8,14 +8,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-03-25 12:19+0000\n" "PO-Revision-Date: 2012-05-03 14:49+0000\n"
"Last-Translator: Radan Putnik <srastral@gmail.com>\n" "Last-Translator: Иван Старчевић <ivanstar61@gmail.com>\n"
"Language-Team: Serbian <gnu@prevod.org>\n" "Language-Team: Serbian <gnu@prevod.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-03-26 04:37+0000\n" "X-Launchpad-Export-Date: 2012-05-04 04:47+0000\n"
"X-Generator: Launchpad (build 15008)\n" "X-Generator: Launchpad (build 15195)\n"
"Language: sr\n" "Language: sr\n"
#. name for aaa #. name for aaa
@ -6152,7 +6152,7 @@ msgstr ""
#. name for deu #. name for deu
msgid "German" msgid "German"
msgstr "немачки" msgstr "Немачки"
#. name for dev #. name for dev
msgid "Domung" msgid "Domung"
@ -8416,7 +8416,7 @@ msgstr "ирски"
#. name for glg #. name for glg
msgid "Galician" msgid "Galician"
msgstr "" msgstr "Галицијски"
#. name for glh #. name for glh
msgid "Pashayi; Northwest" msgid "Pashayi; Northwest"
@ -8472,11 +8472,11 @@ msgstr ""
#. name for gmh #. name for gmh
msgid "German; Middle High (ca. 1050-1500)" msgid "German; Middle High (ca. 1050-1500)"
msgstr "" msgstr "Немачки; средње високи (ca. 1050-1500)"
#. name for gml #. name for gml
msgid "German; Middle Low" msgid "German; Middle Low"
msgstr "" msgstr "Немачки; средње низак"
#. name for gmm #. name for gmm
msgid "Gbaya-Mbodomo" msgid "Gbaya-Mbodomo"
@ -8792,7 +8792,7 @@ msgstr ""
#. name for gsg #. name for gsg
msgid "German Sign Language" msgid "German Sign Language"
msgstr "" msgstr "Немачки језик"
#. name for gsl #. name for gsl
msgid "Gusilay" msgid "Gusilay"
@ -8820,7 +8820,7 @@ msgstr ""
#. name for gsw #. name for gsw
msgid "German; Swiss" msgid "German; Swiss"
msgstr "" msgstr "Немачки ; Швајцарска"
#. name for gta #. name for gta
msgid "Guató" msgid "Guató"
@ -17954,7 +17954,7 @@ msgstr ""
#. name for nds #. name for nds
msgid "German; Low" msgid "German; Low"
msgstr "" msgstr "Немачки; низак"
#. name for ndt #. name for ndt
msgid "Ndunga" msgid "Ndunga"
@ -18778,7 +18778,7 @@ msgstr ""
#. name for nno #. name for nno
msgid "Norwegian Nynorsk" msgid "Norwegian Nynorsk"
msgstr "норвешки модерни" msgstr "Норвешки модерни"
#. name for nnp #. name for nnp
msgid "Naga; Wancho" msgid "Naga; Wancho"
@ -18830,7 +18830,7 @@ msgstr ""
#. name for nob #. name for nob
msgid "Norwegian Bokmål" msgid "Norwegian Bokmål"
msgstr "" msgstr "Норвешки (књижевни)"
#. name for noc #. name for noc
msgid "Nuk" msgid "Nuk"
@ -18886,7 +18886,7 @@ msgstr ""
#. name for nor #. name for nor
msgid "Norwegian" msgid "Norwegian"
msgstr "норвешки" msgstr "Норвешки"
#. name for nos #. name for nos
msgid "Nisu; Eastern" msgid "Nisu; Eastern"
@ -19066,7 +19066,7 @@ msgstr ""
#. name for nsl #. name for nsl
msgid "Norwegian Sign Language" msgid "Norwegian Sign Language"
msgstr "" msgstr "Норвешки језик"
#. name for nsm #. name for nsm
msgid "Naga; Sumi" msgid "Naga; Sumi"
@ -20406,7 +20406,7 @@ msgstr ""
#. name for pdc #. name for pdc
msgid "German; Pennsylvania" msgid "German; Pennsylvania"
msgstr "" msgstr "Немачки ; Пенсилванија"
#. name for pdi #. name for pdi
msgid "Pa Di" msgid "Pa Di"
@ -22086,7 +22086,7 @@ msgstr ""
#. name for rmg #. name for rmg
msgid "Norwegian; Traveller" msgid "Norwegian; Traveller"
msgstr "" msgstr "Норвешки; путнички"
#. name for rmh #. name for rmh
msgid "Murkim" msgid "Murkim"
@ -22871,7 +22871,7 @@ msgstr ""
#. name for sgg #. name for sgg
msgid "Swiss-German Sign Language" msgid "Swiss-German Sign Language"
msgstr "" msgstr "Швајцарско-Немачки језик"
#. name for sgh #. name for sgh
msgid "Shughni" msgid "Shughni"

View File

@ -10,14 +10,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2012-04-22 07:11+0000\n" "PO-Revision-Date: 2012-05-12 10:25+0000\n"
"Last-Translator: kulkke <Unknown>\n" "Last-Translator: kulkke <Unknown>\n"
"Language-Team: Turkish <gnome-turk@gnome.org>\n" "Language-Team: Turkish <gnome-turk@gnome.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2012-04-23 04:45+0000\n" "X-Launchpad-Export-Date: 2012-05-13 04:43+0000\n"
"X-Generator: Launchpad (build 15135)\n" "X-Generator: Launchpad (build 15225)\n"
"Language: tr\n" "Language: tr\n"
#. name for aaa #. name for aaa
@ -406,7 +406,7 @@ msgstr ""
#. name for aed #. name for aed
msgid "Argentine Sign Language" msgid "Argentine Sign Language"
msgstr "" msgstr "Arjantin İşaret Dili"
#. name for aee #. name for aee
msgid "Pashayi; Northeast" msgid "Pashayi; Northeast"
@ -1554,7 +1554,7 @@ msgstr "Dano"
#. name for asp #. name for asp
msgid "Algerian Sign Language" msgid "Algerian Sign Language"
msgstr "" msgstr "Cezayir İşaret Dili"
#. name for asq #. name for asq
msgid "Austrian Sign Language" msgid "Austrian Sign Language"
@ -2578,7 +2578,7 @@ msgstr "Blafe"
#. name for bfi #. name for bfi
msgid "British Sign Language" msgid "British Sign Language"
msgstr "" msgstr "Britanya İşaret Dili"
#. name for bfj #. name for bfj
msgid "Bafanji" msgid "Bafanji"
@ -4167,7 +4167,7 @@ msgstr "Bukat"
#. name for bvl #. name for bvl
msgid "Bolivian Sign Language" msgid "Bolivian Sign Language"
msgstr "" msgstr "Bolivya İşaret Dili"
#. name for bvm #. name for bvm
msgid "Bamunka" msgid "Bamunka"
@ -4587,7 +4587,7 @@ msgstr "Biri"
#. name for bzs #. name for bzs
msgid "Brazilian Sign Language" msgid "Brazilian Sign Language"
msgstr "" msgstr "Brezilya İşaret Dili"
#. name for bzt #. name for bzt
msgid "Brithenig" msgid "Brithenig"
@ -5623,11 +5623,11 @@ msgstr ""
#. name for csf #. name for csf
msgid "Cuba Sign Language" msgid "Cuba Sign Language"
msgstr "" msgstr "Küba İşaret Dili"
#. name for csg #. name for csg
msgid "Chilean Sign Language" msgid "Chilean Sign Language"
msgstr "" msgstr "Şili İşaret Dili"
#. name for csh #. name for csh
msgid "Chin; Asho" msgid "Chin; Asho"
@ -5651,7 +5651,7 @@ msgstr ""
#. name for csn #. name for csn
msgid "Colombian Sign Language" msgid "Colombian Sign Language"
msgstr "" msgstr "Kolombiya İşaret Dili"
#. name for cso #. name for cso
msgid "Chinantec; Sochiapan" msgid "Chinantec; Sochiapan"
@ -5663,7 +5663,7 @@ msgstr ""
#. name for csr #. name for csr
msgid "Costa Rican Sign Language" msgid "Costa Rican Sign Language"
msgstr "" msgstr "Kosta Rika İşaret Dili"
#. name for css #. name for css
msgid "Ohlone; Southern" msgid "Ohlone; Southern"
@ -7347,7 +7347,7 @@ msgstr ""
#. name for esl #. name for esl
msgid "Egypt Sign Language" msgid "Egypt Sign Language"
msgstr "" msgstr "Mısır İşaret Dili"
#. name for esm #. name for esm
msgid "Esuma" msgid "Esuma"
@ -7551,7 +7551,7 @@ msgstr ""
#. name for fcs #. name for fcs
msgid "Quebec Sign Language" msgid "Quebec Sign Language"
msgstr "" msgstr "Quebec İşaret Dili"
#. name for fer #. name for fer
msgid "Feroge" msgid "Feroge"
@ -8806,7 +8806,7 @@ msgstr ""
#. name for gsm #. name for gsm
msgid "Guatemalan Sign Language" msgid "Guatemalan Sign Language"
msgstr "" msgstr "Guatemala İşaret Dili"
#. name for gsn #. name for gsn
msgid "Gusan" msgid "Gusan"
@ -10895,7 +10895,7 @@ msgstr ""
#. name for jos #. name for jos
msgid "Jordanian Sign Language" msgid "Jordanian Sign Language"
msgstr "" msgstr "Ürdün İşaret Dili"
#. name for jow #. name for jow
msgid "Jowulu" msgid "Jowulu"
@ -13847,7 +13847,7 @@ msgstr ""
#. name for lbs #. name for lbs
msgid "Libyan Sign Language" msgid "Libyan Sign Language"
msgstr "" msgstr "Libya İşaret Dili"
#. name for lbt #. name for lbt
msgid "Lachi" msgid "Lachi"
@ -15591,7 +15591,7 @@ msgstr ""
#. name for mfs #. name for mfs
msgid "Mexican Sign Language" msgid "Mexican Sign Language"
msgstr "" msgstr "Meksika İşaret Dili"
#. name for mft #. name for mft
msgid "Mokerang" msgid "Mokerang"
@ -17055,7 +17055,7 @@ msgstr ""
#. name for mul #. name for mul
msgid "Multiple languages" msgid "Multiple languages"
msgstr "" msgstr "Çoklu diller"
#. name for mum #. name for mum
msgid "Maiwala" msgid "Maiwala"
@ -17867,7 +17867,7 @@ msgstr ""
#. name for ncs #. name for ncs
msgid "Nicaraguan Sign Language" msgid "Nicaraguan Sign Language"
msgstr "" msgstr "Nikaragua İşaret Dili"
#. name for nct #. name for nct
msgid "Naga; Chothe" msgid "Naga; Chothe"
@ -19495,7 +19495,7 @@ msgstr ""
#. name for nzs #. name for nzs
msgid "New Zealand Sign Language" msgid "New Zealand Sign Language"
msgstr "" msgstr "Yeni Zelanda İşaret Dili"
#. name for nzu #. name for nzu
msgid "Teke-Nzikou" msgid "Teke-Nzikou"
@ -21219,7 +21219,7 @@ msgstr ""
#. name for prl #. name for prl
msgid "Peruvian Sign Language" msgid "Peruvian Sign Language"
msgstr "" msgstr "Peru İşaret Dili"
#. name for prm #. name for prm
msgid "Kibiri" msgid "Kibiri"
@ -22699,7 +22699,7 @@ msgstr ""
#. name for sdl #. name for sdl
msgid "Saudi Arabian Sign Language" msgid "Saudi Arabian Sign Language"
msgstr "" msgstr "Suudi Arabistan İşaret Dili"
#. name for sdm #. name for sdm
msgid "Semandang" msgid "Semandang"
@ -22847,7 +22847,7 @@ msgstr ""
#. name for sfs #. name for sfs
msgid "South African Sign Language" msgid "South African Sign Language"
msgstr "" msgstr "Güney Afrika İşaret Dili"
#. name for sfw #. name for sfw
msgid "Sehwi" msgid "Sehwi"
@ -25943,7 +25943,7 @@ msgstr ""
#. name for tse #. name for tse
msgid "Tunisian Sign Language" msgid "Tunisian Sign Language"
msgstr "" msgstr "Tunus İşaret Dili"
#. name for tsf #. name for tsf
msgid "Tamang; Southwestern" msgid "Tamang; Southwestern"
@ -27348,7 +27348,7 @@ msgstr ""
#. name for vsl #. name for vsl
msgid "Venezuelan Sign Language" msgid "Venezuelan Sign Language"
msgstr "" msgstr "Venezuela İşaret Dili"
#. name for vsv #. name for vsv
msgid "Valencian Sign Language" msgid "Valencian Sign Language"
@ -28760,7 +28760,7 @@ msgstr ""
#. name for xms #. name for xms
msgid "Moroccan Sign Language" msgid "Moroccan Sign Language"
msgstr "" msgstr "Fas İşaret Dili"
#. name for xmt #. name for xmt
msgid "Matbat" msgid "Matbat"
@ -29540,7 +29540,7 @@ msgstr ""
#. name for yid #. name for yid
msgid "Yiddish" msgid "Yiddish"
msgstr "Yiddiş" msgstr "Yidiş"
#. name for yif #. name for yif
msgid "Ache" msgid "Ache"

View File

@ -26,7 +26,7 @@ def get_opts_from_parser(parser):
class Coffee(Command): # {{{ class Coffee(Command): # {{{
description = 'Compile coffeescript files into javascript' description = 'Compile coffeescript files into javascript'
COFFEE_DIRS = {'ebooks/oeb/display': 'display'} COFFEE_DIRS = ('ebooks/oeb/display',)
def add_options(self, parser): def add_options(self, parser):
parser.add_option('--watch', '-w', action='store_true', default=False, parser.add_option('--watch', '-w', action='store_true', default=False,
@ -47,47 +47,67 @@ class Coffee(Command): # {{{
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
def show_js(self, jsfile): def show_js(self, raw):
from pygments.lexers import JavascriptLexer from pygments.lexers import JavascriptLexer
from pygments.formatters import TerminalFormatter from pygments.formatters import TerminalFormatter
from pygments import highlight from pygments import highlight
with open(jsfile, 'rb') as f:
raw = f.read()
print highlight(raw, JavascriptLexer(), TerminalFormatter()) print highlight(raw, JavascriptLexer(), TerminalFormatter())
def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False): def do_coffee_compile(self, opts, timestamp=False, ignore_errors=False):
for toplevel, dest in self.COFFEE_DIRS.iteritems(): src_files = {}
dest = self.j(self.RESOURCES, dest) for src in self.COFFEE_DIRS:
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')): for f in glob.glob(self.j(self.SRC, __appname__, src,
js = self.j(dest, os.path.basename(x.rpartition('.')[0]+'.js')) '*.coffee')):
if self.newer(js, x): bn = os.path.basename(f).rpartition('.')[0]
arcname = src.replace('/', '.') + '.' + bn + '.js'
src_files[arcname] = (f, os.stat(f).st_mtime)
existing = {}
dest = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
if os.path.exists(dest):
with zipfile.ZipFile(dest, 'r') as zf:
for info in zf.infolist():
mtime = time.mktime(info.date_time + (0, 0, -1))
arcname = info.filename
if (arcname in src_files and src_files[arcname][1] <
mtime):
existing[arcname] = (zf.read(info), info)
todo = set(src_files) - set(existing)
updated = {}
for arcname in todo:
name = arcname.rpartition('.')[0]
print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if print ('\t%sCompiling %s'%(time.strftime('[%H:%M:%S] ') if
timestamp else '', os.path.basename(x))) timestamp else '', name))
src = src_files[arcname][0]
try: try:
cs = subprocess.check_output(self.compiler + js = subprocess.check_output(self.compiler +
[x]).decode('utf-8') [src]).decode('utf-8')
except Exception as e: except Exception as e:
print ('\n\tCompilation of %s failed'%os.path.basename(x)) print ('\n\tCompilation of %s failed'%name)
print (e) print (e)
if ignore_errors: if ignore_errors:
with open(js, 'wb') as f: js = u'# Compilation from coffeescript failed'
f.write('# Compilation from coffeescript failed')
else: else:
raise SystemExit(1) raise SystemExit(1)
else: else:
with open(js, 'wb') as f:
f.write(cs.encode('utf-8'))
if opts.show_js: if opts.show_js:
self.show_js(js) self.show_js(js)
print ('#'*80) print ('#'*80)
print ('#'*80) print ('#'*80)
zi = zipfile.ZipInfo()
zi.filename = arcname
zi.date_time = time.localtime()[:6]
updated[arcname] = (js.encode('utf-8'), zi)
if updated:
with zipfile.ZipFile(dest, 'w', zipfile.ZIP_STORED) as zf:
for raw, zi in updated.itervalues():
zf.writestr(zi, raw)
for raw, zi in existing.itervalues():
zf.writestr(zi, raw)
def clean(self): def clean(self):
for toplevel, dest in self.COFFEE_DIRS.iteritems(): x = self.j(self.RESOURCES, 'compiled_coffeescript.zip')
dest = self.j(self.RESOURCES, dest)
for x in glob.glob(self.j(self.SRC, __appname__, toplevel, '*.coffee')):
x = x.rpartition('.')[0] + '.js'
x = self.j(dest, os.path.basename(x))
if os.path.exists(x): if os.path.exists(x):
os.remove(x) os.remove(x)
# }}} # }}}

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, os, re, time, random, __builtin__, warnings import sys, os, re, time, random, __builtin__, warnings
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None) __builtin__.__dict__['dynamic_property'] = lambda func: func(None)
from math import floor from math import floor
from functools import partial from functools import partial

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 8, 49) numeric_version = (0, 8, 52)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -421,6 +421,16 @@ class EPUBMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.epub import set_metadata from calibre.ebooks.metadata.epub import set_metadata
set_metadata(stream, mi, apply_null=self.apply_null) set_metadata(stream, mi, apply_null=self.apply_null)
class FB2MetadataWriter(MetadataWriterPlugin):
name = 'Set FB2 metadata'
file_types = set(['fb2'])
description = _('Set metadata in %s files')%'FB2'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.fb2 import set_metadata
set_metadata(stream, mi, apply_null=self.apply_null)
class HTMLZMetadataWriter(MetadataWriterPlugin): class HTMLZMetadataWriter(MetadataWriterPlugin):
name = 'Set HTMLZ metadata' name = 'Set HTMLZ metadata'
@ -1321,15 +1331,15 @@ class StoreEbookscomStore(StoreBase):
formats = ['EPUB', 'LIT', 'MOBI', 'PDF'] formats = ['EPUB', 'LIT', 'MOBI', 'PDF']
affiliate = True affiliate = True
class StoreEBookShoppeUKStore(StoreBase): # class StoreEBookShoppeUKStore(StoreBase):
name = 'ebookShoppe UK' # name = 'ebookShoppe UK'
author = u'Charles Haley' # author = u'Charles Haley'
description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.' # description = u'We made this website in an attempt to offer the widest range of UK eBooks possible across and as many formats as we could manage.'
actual_plugin = 'calibre.gui2.store.stores.ebookshoppe_uk_plugin:EBookShoppeUKStore' # actual_plugin = 'calibre.gui2.store.stores.ebookshoppe_uk_plugin:EBookShoppeUKStore'
#
headquarters = 'UK' # headquarters = 'UK'
formats = ['EPUB', 'PDF'] # formats = ['EPUB', 'PDF']
affiliate = True # affiliate = True
class StoreEHarlequinStore(StoreBase): class StoreEHarlequinStore(StoreBase):
name = 'eHarlequin' name = 'eHarlequin'
@ -1613,7 +1623,6 @@ plugins += [
StoreEbookNLStore, StoreEbookNLStore,
StoreEbookpointStore, StoreEbookpointStore,
StoreEbookscomStore, StoreEbookscomStore,
StoreEBookShoppeUKStore,
StoreEHarlequinStore, StoreEHarlequinStore,
StoreEKnigiStore, StoreEKnigiStore,
StoreEscapeMagazineStore, StoreEscapeMagazineStore,

View File

@ -295,3 +295,17 @@ class OutputFormatPlugin(Plugin):
return self.oeb.metadata.publication_type and \ return self.oeb.metadata.publication_type and \
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:') unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
def specialize_css_for_output(self, log, opts, item, stylizer):
'''
Can be used to make changes to the css during the CSS flattening
process.
:param item: The item (HTML file) being processed
:param stylizer: A Stylizer object containing the flattened styles for
item. You can get the style for any element by
stylizer.style(element).
'''
pass

View File

@ -59,9 +59,7 @@ Run an embedded python interpreter.
'files and metadata, which you can edit using standard HTML ' 'files and metadata, which you can edit using standard HTML '
'editing tools, and then rebuilds the file from the edited HTML. ' 'editing tools, and then rebuilds the file from the edited HTML. '
'Makes no additional changes to the HTML, unlike a full calibre ' 'Makes no additional changes to the HTML, unlike a full calibre '
'conversion). Note that this tool will try to open the ' 'conversion).')
'folder containing the HTML files in the editor pointed to by the'
' EDITOR environment variable.')
parser.add_option('--test-build', help='Test binary modules in build', parser.add_option('--test-build', help='Test binary modules in build',
action='store_true', default=False) action='store_true', default=False)
@ -184,6 +182,12 @@ def main(args=sys.argv):
from calibre.constants import debug from calibre.constants import debug
debug() debug()
if len(args) > 2 and args[1] in ('-e', '--exec-file'): if len(args) > 2 and args[1] in ('-e', '--exec-file'):
# Load all plugins user defined plugins so the script can import from the
# calibre_plugins namespace
import calibre.customize.ui as dummy
dummy
sys.argv = [args[2]] + args[3:] sys.argv = [args[2]] + args[3:]
ef = os.path.abspath(args[2]) ef = os.path.abspath(args[2])
base = os.path.dirname(ef) base = os.path.dirname(ef)
@ -222,7 +226,7 @@ def main(args=sys.argv):
from calibre.utils.pyconsole.main import main from calibre.utils.pyconsole.main import main
main() main()
elif opts.command: elif opts.command:
sys.argv = args[:1] sys.argv = args
exec opts.command exec opts.command
elif opts.debug_device_driver: elif opts.debug_device_driver:
debug_device_driver() debug_device_driver()

View File

@ -57,6 +57,7 @@ class ANDROID(USBMS):
0x4316 : [0x216], 0x4316 : [0x216],
0x42d6 : [0x216], 0x42d6 : [0x216],
0x42d7 : [0x216], 0x42d7 : [0x216],
0x42f7 : [0x216],
}, },
# Freescale # Freescale
0x15a2 : { 0x15a2 : {
@ -177,7 +178,7 @@ class ANDROID(USBMS):
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA', 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP', 'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C'] 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -193,7 +194,7 @@ class ANDROID(USBMS):
'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855', 'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW', 'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T', 'P999DW',
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
'GT-S5830L_CARD'] 'GT-S5830L_CARD', 'UNIVERSE', 'XT875']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -201,7 +202,8 @@ class ANDROID(USBMS):
'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853', 'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD', 'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC', 'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER'] 'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
'UMS_COMPOSITE']
OSX_MAIN_MEM = 'Android Device Main Memory' OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -92,6 +92,10 @@ class POCKETBOOK360(EB600):
name = 'PocketBook 360 Device Interface' name = 'PocketBook 360 Device Interface'
gui_name = 'PocketBook 360' gui_name = 'PocketBook 360'
VENDOR_ID = [0x1f85, 0x525]
PRODUCT_ID = [0x1688, 0xa4a5]
BCD = [0x110]
FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt'] FORMATS = ['epub', 'fb2', 'prc', 'mobi', 'pdf', 'djvu', 'rtf', 'chm', 'txt']

View File

@ -13,6 +13,7 @@ import datetime, os, re, sys, json, hashlib
from calibre.devices.kindle.bookmark import Bookmark from calibre.devices.kindle.bookmark import Bookmark
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
from calibre import strftime from calibre import strftime
from calibre.utils.logging import default_log
''' '''
Notes on collections: Notes on collections:
@ -324,6 +325,7 @@ class KINDLE2(KINDLE):
OPT_APNX = 0 OPT_APNX = 0
OPT_APNX_ACCURATE = 1 OPT_APNX_ACCURATE = 1
OPT_APNX_CUST_COL = 2 OPT_APNX_CUST_COL = 2
THUMBNAIL_HEIGHT = 180
def formats_to_scan_for(self): def formats_to_scan_for(self):
ans = USBMS.formats_to_scan_for(self) | {'azw3'} ans = USBMS.formats_to_scan_for(self) | {'azw3'}
@ -375,8 +377,36 @@ class KINDLE2(KINDLE):
def upload_cover(self, path, filename, metadata, filepath): def upload_cover(self, path, filename, metadata, filepath):
''' '''
Hijacking this function to write the apnx file. Upload sidecar files: cover thumbnails and page count
''' '''
# Upload the cover thumbnail
try:
self.upload_kindle_thumbnail(metadata, filepath)
except:
import traceback
traceback.print_exc()
# Upload the apnx file
self.upload_apnx(filename, metadata, filepath)
def upload_kindle_thumbnail(self, metadata, filepath):
coverdata = getattr(metadata, 'thumbnail', None)
if not coverdata or not coverdata[2]:
return
thumb_dir = os.path.join(self._main_prefix, 'system', 'thumbnails')
if not os.path.exists(thumb_dir): return
from calibre.ebooks.mobi.reader.headers import MetadataHeader
with lopen(filepath, 'rb') as f:
mh = MetadataHeader(f, default_log)
if mh.exth is None or not mh.exth.uuid or not mh.exth.cdetype:
return
thumbfile = os.path.join(thumb_dir,
'thumbnail_{uuid}_{cdetype}_portrait.jpg'.format(
uuid=mh.exth.uuid, cdetype=mh.exth.cdetype))
with open(thumbfile, 'wb') as f:
f.write(coverdata[2])
def upload_apnx(self, filename, metadata, filepath):
from calibre.devices.kindle.apnx import APNXBuilder from calibre.devices.kindle.apnx import APNXBuilder
opts = self.settings() opts = self.settings()
@ -422,6 +452,9 @@ class KINDLE_DX(KINDLE2):
PRODUCT_ID = [0x0003] PRODUCT_ID = [0x0003]
BCD = [0x0100] BCD = [0x0100]
def upload_kindle_thumbnail(self, metadata, filepath):
pass
class KINDLE_FIRE(KINDLE2): class KINDLE_FIRE(KINDLE2):
name = 'Kindle Fire Device Interface' name = 'Kindle Fire Device Interface'
@ -440,4 +473,6 @@ class KINDLE_FIRE(KINDLE2):
VENDOR_NAME = 'AMAZON' VENDOR_NAME = 'AMAZON'
WINDOWS_MAIN_MEM = 'KINDLE' WINDOWS_MAIN_MEM = 'KINDLE'
def upload_kindle_thumbnail(self, metadata, filepath):
pass

View File

@ -57,10 +57,11 @@ class PICO(NEWSMY):
gui_name = 'Pico' gui_name = 'Pico'
description = _('Communicate with the Pico reader.') description = _('Communicate with the Pico reader.')
VENDOR_NAME = ['TECLAST', 'IMAGIN', 'LASER-'] VENDOR_NAME = ['TECLAST', 'IMAGIN', 'LASER-', '']
WINDOWS_MAIN_MEM = ['USBDISK__USER', 'EB720'] WINDOWS_MAIN_MEM = ['USBDISK__USER', 'EB720']
EBOOK_DIR_MAIN = 'Books' EBOOK_DIR_MAIN = 'Books'
FORMATS = ['EPUB', 'FB2', 'TXT', 'LRC', 'PDB', 'PDF', 'HTML', 'WTXT'] FORMATS = ['EPUB', 'FB2', 'TXT', 'LRC', 'PDB', 'PDF', 'HTML', 'WTXT']
SCAN_FROM_ROOT = True
class IPAPYRUS(TECLAST_K3): class IPAPYRUS(TECLAST_K3):

View File

@ -155,7 +155,11 @@ class CHMReader(CHMFile):
self.hhc_path = f self.hhc_path = f
break break
if self.hhc_path not in files and files: if self.hhc_path not in files and files:
self.hhc_path = files[0] for f in files:
if f.partition('.')[-1].lower() in {'html', 'htm', 'xhtm',
'xhtml'}:
self.hhc_path = f
break
if self.hhc_path == '.hhc' and self.hhc_path not in files: if self.hhc_path == '.hhc' and self.hhc_path not in files:
from calibre import walk from calibre import walk
@ -165,6 +169,9 @@ class CHMReader(CHMFile):
self.hhc_path = os.path.relpath(x, output_dir) self.hhc_path = os.path.relpath(x, output_dir)
break break
if self.hhc_path not in files and files:
self.hhc_path = files[0]
def _reformat(self, data, htmlpath): def _reformat(self, data, htmlpath):
if self.input_encoding: if self.input_encoding:
data = data.decode(self.input_encoding) data = data.decode(self.input_encoding)
@ -241,7 +248,10 @@ class CHMReader(CHMFile):
except: except:
pass pass
# do not prettify, it would reformat the <pre> tags! # do not prettify, it would reformat the <pre> tags!
try:
return str(soup) return str(soup)
except RuntimeError:
return data
def Contents(self): def Contents(self):
if self._contents is not None: if self._contents is not None:

View File

@ -1,4 +1,25 @@
from __future__ import with_statement # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL 3' from __future__ import (unicode_literals, division, absolute_import,
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
class ConversionUserFeedBack(Exception):
def __init__(self, title, msg, level='info', det_msg=''):
''' Show a simple message to the user
:param title: The title (very short description)
:param msg: The message to show the user
:param level: Must be one of 'info', 'warn' or 'error'
:param det_msg: Optional detailed message to show the user
'''
import json
Exception.__init__(self, json.dumps({'msg':msg, 'level':level,
'det_msg':det_msg, 'title':title}))
self.title, self.msg, self.det_msg = title, msg, det_msg
self.level = level

View File

@ -15,6 +15,7 @@ from calibre.utils.logging import Log
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre import patheq from calibre import patheq
from calibre.ebooks.conversion import ConversionUserFeedBack
USAGE = '%prog ' + _('''\ USAGE = '%prog ' + _('''\
input_file output_file [options] input_file output_file [options]
@ -304,7 +305,10 @@ def read_sr_patterns(path, log=None):
def main(args=sys.argv): def main(args=sys.argv):
log = Log() log = Log()
parser, plumber = create_option_parser(args, log) parser, plumber = create_option_parser(args, log)
opts = parser.parse_args(args)[0] opts, leftover_args = parser.parse_args(args)
if len(leftover_args) > 3:
log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
return 1
for x in ('read_metadata_from_opf', 'cover'): for x in ('read_metadata_from_opf', 'cover'):
if getattr(opts, x, None) is not None: if getattr(opts, x, None) is not None:
setattr(opts, x, abspath(getattr(opts, x))) setattr(opts, x, abspath(getattr(opts, x)))
@ -317,7 +321,16 @@ def main(args=sys.argv):
if n.dest] if n.dest]
plumber.merge_ui_recommendations(recommendations) plumber.merge_ui_recommendations(recommendations)
try:
plumber.run() plumber.run()
except ConversionUserFeedBack as e:
ll = {'info': log.info, 'warn': log.warn,
'error':log.error}.get(e.level, log.info)
ll(e.title)
if e.det_msg:
log.debug(e.detmsg)
ll(e.msg)
raise SystemExit(1)
log(_('Output saved to'), ' ', plumber.output) log(_('Output saved to'), ' ', plumber.output)

View File

@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin):
return False return False
def rationalize_cover(self, opf, log): def rationalize_cover(self, opf, log):
removed = None
from lxml import etree from lxml import etree
guide_cover, guide_elem = None, None guide_cover, guide_elem = None, None
for guide_elem in opf.iterguide(): for guide_elem in opf.iterguide():
@ -91,6 +92,7 @@ class EPUBInput(InputFormatPlugin):
# specially # specially
if not self.for_viewer: if not self.for_viewer:
spine[0].getparent().remove(spine[0]) spine[0].getparent().remove(spine[0])
removed = guide_cover
guide_elem.set('href', 'calibre_raster_cover.jpg') guide_elem.set('href', 'calibre_raster_cover.jpg')
from calibre.ebooks.oeb.base import OPF from calibre.ebooks.oeb.base import OPF
t = etree.SubElement(elem[0].getparent(), OPF('item'), t = etree.SubElement(elem[0].getparent(), OPF('item'),
@ -109,6 +111,7 @@ class EPUBInput(InputFormatPlugin):
if renderer is not None: if renderer is not None:
open('calibre_raster_cover.jpg', 'wb').write( open('calibre_raster_cover.jpg', 'wb').write(
renderer) renderer)
return removed
def find_opf(self): def find_opf(self):
from lxml import etree from lxml import etree
@ -170,7 +173,7 @@ class EPUBInput(InputFormatPlugin):
for elem in opf.iterguide(): for elem in opf.iterguide():
elem.set('href', delta+elem.get('href')) elem.set('href', delta+elem.get('href'))
self.rationalize_cover(opf, log) self.removed_cover = self.rationalize_cover(opf, log)
self.optimize_opf_parsing = opf self.optimize_opf_parsing = opf
for x in opf.itermanifest(): for x in opf.itermanifest():
@ -198,3 +201,17 @@ class EPUBInput(InputFormatPlugin):
nopf.write(opf.render()) nopf.write(opf.render())
return os.path.abspath(u'content.opf') return os.path.abspath(u'content.opf')
def postprocess_book(self, oeb, opts, log):
rc = getattr(self, 'removed_cover', None)
if rc:
cover_toc_item = None
for item in oeb.toc.iterdescendants():
if item.href and item.href.partition('#')[0] == rc:
cover_toc_item = item
break
spine = {x.href for x in oeb.spine}
if (cover_toc_item is not None and cover_toc_item not in spine):
oeb.toc.item_that_refers_to_cover = cover_toc_item

View File

@ -312,13 +312,9 @@ class EPUBOutput(OutputFormatPlugin):
Perform various markup transforms to get the output to render correctly Perform various markup transforms to get the output to render correctly
in the quirky ADE. in the quirky ADE.
''' '''
from calibre.ebooks.oeb.base import XPath, XHTML, OEB_STYLES, barename, urlunquote from calibre.ebooks.oeb.base import XPath, XHTML, barename, urlunquote
stylesheet = None stylesheet = self.oeb.manifest.main_stylesheet
for item in self.oeb.manifest:
if item.media_type.lower() in OEB_STYLES:
stylesheet = item
break
# ADE cries big wet tears when it encounters an invalid fragment # ADE cries big wet tears when it encounters an invalid fragment
# identifier in the NCX toc. # identifier in the NCX toc.
@ -397,8 +393,14 @@ class EPUBOutput(OutputFormatPlugin):
for tag in XPath('//h:body/descendant::h:script')(root): for tag in XPath('//h:body/descendant::h:script')(root):
tag.getparent().remove(tag) tag.getparent().remove(tag)
formchildren = XPath('./h:input|./h:button|./h:textarea|'
'./h:label|./h:fieldset|./h:legend')
for tag in XPath('//h:form')(root): for tag in XPath('//h:form')(root):
if formchildren(tag):
tag.getparent().remove(tag) tag.getparent().remove(tag)
else:
# Not a real form
tag.tag = XHTML('div')
for tag in XPath('//h:center')(root): for tag in XPath('//h:center')(root):
tag.tag = XHTML('div') tag.tag = XHTML('div')

View File

@ -12,7 +12,7 @@ class MOBIInput(InputFormatPlugin):
name = 'MOBI Input' name = 'MOBI Input'
author = 'Kovid Goyal' author = 'Kovid Goyal'
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML' description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
file_types = set(['mobi', 'prc', 'azw', 'azw3']) file_types = set(['mobi', 'prc', 'azw', 'azw3', 'pobi'])
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):

View File

@ -232,6 +232,10 @@ class MOBIOutput(OutputFormatPlugin):
writer(oeb, output_path) writer(oeb, output_path)
extract_mobi(output_path, opts) extract_mobi(output_path, opts)
def specialize_css_for_output(self, log, opts, item, stylizer):
from calibre.ebooks.mobi.writer8.cleanup import CSSCleanup
CSSCleanup(log, opts)(item, stylizer)
class AZW3Output(OutputFormatPlugin): class AZW3Output(OutputFormatPlugin):
name = 'AZW3 Output' name = 'AZW3 Output'
@ -254,9 +258,6 @@ class AZW3Output(OutputFormatPlugin):
recommended_value=False, level=OptionRecommendation.LOW, recommended_value=False, level=OptionRecommendation.LOW,
help=_('Disable compression of the file contents.') help=_('Disable compression of the file contents.')
), ),
OptionRecommendation(name='personal_doc', recommended_value='[PDOC]',
help=_('Tag marking book to be filed with Personal Docs')
),
OptionRecommendation(name='mobi_toc_at_start', OptionRecommendation(name='mobi_toc_at_start',
recommended_value=False, recommended_value=False,
help=_('When adding the Table of Contents to the book, add it at the start of the ' help=_('When adding the Table of Contents to the book, add it at the start of the '
@ -298,4 +299,8 @@ class AZW3Output(OutputFormatPlugin):
kf8.write(output_path) kf8.write(output_path)
extract_mobi(output_path, opts) extract_mobi(output_path, opts)
def specialize_css_for_output(self, log, opts, item, stylizer):
from calibre.ebooks.mobi.writer8.cleanup import CSSCleanup
CSSCleanup(log, opts)(item, stylizer)

View File

@ -99,12 +99,8 @@ class PDFOutput(OutputFormatPlugin):
# Remove page-break-before on <body> element as it causes # Remove page-break-before on <body> element as it causes
# blank pages in PDF Output # blank pages in PDF Output
from calibre.ebooks.oeb.base import OEB_STYLES, XPath from calibre.ebooks.oeb.base import XPath
stylesheet = None stylesheet = self.oeb.manifest.main_stylesheet
for item in self.oeb.manifest:
if item.media_type.lower() in OEB_STYLES:
stylesheet = item
break
if stylesheet is not None: if stylesheet is not None:
from cssutils.css import CSSRule from cssutils.css import CSSRule
classes = set(['.calibre']) classes = set(['.calibre'])

View File

@ -4,6 +4,7 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, re, sys, shutil, pprint import os, re, sys, shutil, pprint
from functools import partial
from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.customize.ui import input_profiles, output_profiles, \ from calibre.customize.ui import input_profiles, output_profiles, \
@ -342,21 +343,25 @@ OptionRecommendation(name='remove_fake_margins',
OptionRecommendation(name='margin_top', OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the top margin in pts. Default is %default. ' help=_('Set the top margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')), 'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_bottom', OptionRecommendation(name='margin_bottom',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the bottom margin in pts. Default is %default. ' help=_('Set the bottom margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')), 'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_left', OptionRecommendation(name='margin_left',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the left margin in pts. Default is %default. ' help=_('Set the left margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')), 'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='margin_right', OptionRecommendation(name='margin_right',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the right margin in pts. Default is %default. ' help=_('Set the right margin in pts. Default is %default. '
'Setting this to less than zero will cause no margin to be set. '
'Note: 72 pts equals 1 inch')), 'Note: 72 pts equals 1 inch')),
OptionRecommendation(name='change_justification', OptionRecommendation(name='change_justification',
@ -884,7 +889,10 @@ OptionRecommendation(name='search_replace',
self.log.debug('Resolved conversion options') self.log.debug('Resolved conversion options')
try: try:
self.log.debug('calibre version:', __version__) self.log.debug('calibre version:', __version__)
self.log.debug(pprint.pformat(self.opts.__dict__)) odict = dict(self.opts.__dict__)
for x in ('username', 'password'):
odict.pop(x, None)
self.log.debug(pprint.pformat(odict))
except: except:
self.log.exception('Failed to get resolved conversion options') self.log.exception('Failed to get resolved conversion options')
@ -1010,6 +1018,13 @@ OptionRecommendation(name='search_replace',
pr(0.35) pr(0.35)
self.flush() self.flush()
if self.output_plugin.file_type != 'epub':
# Remove the toc reference to the html cover, if any, except for
# epub, as the epub output plugin will do the right thing with it.
item = getattr(self.oeb.toc, 'item_that_refers_to_cover', None)
if item is not None and item.count() == 0:
self.oeb.toc.remove(item)
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
fbase = self.opts.base_font_size fbase = self.opts.base_font_size
if fbase < 1e-4: if fbase < 1e-4:
@ -1061,7 +1076,9 @@ OptionRecommendation(name='search_replace',
untable=self.output_plugin.file_type in ('mobi','lit'), untable=self.output_plugin.file_type in ('mobi','lit'),
unfloat=self.output_plugin.file_type in ('mobi', 'lit'), unfloat=self.output_plugin.file_type in ('mobi', 'lit'),
page_break_on_body=self.output_plugin.file_type in ('mobi', page_break_on_body=self.output_plugin.file_type in ('mobi',
'lit')) 'lit'),
specializer=partial(self.output_plugin.specialize_css_for_output,
self.log, self.opts))
flattener(self.oeb, self.opts) flattener(self.oeb, self.opts)
self.opts.insert_blank_line = oibl self.opts.insert_blank_line = oibl

View File

@ -148,7 +148,7 @@ class HeuristicProcessor(object):
return wordcount.words return wordcount.words
def markup_italicis(self, html): def markup_italicis(self, html):
self.log.debug("\n\n\nitalicize debugging \n\n\n") #self.log.debug("\n\n\nitalicize debugging \n\n\n")
ITALICIZE_WORDS = [ ITALICIZE_WORDS = [
'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.', 'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.',
'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.', 'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.',
@ -184,6 +184,9 @@ class HeuristicProcessor(object):
except OverflowError: except OverflowError:
# match.group(0) was too large to be compiled into a regex # match.group(0) was too large to be compiled into a regex
continue continue
except re.error:
# the match was not a valid regular expression
continue
return html return html

View File

@ -113,6 +113,11 @@ class HTMLFile(object):
raise IOError(msg) raise IOError(msg)
raise IgnoreFile(msg, err.errno) raise IgnoreFile(msg, err.errno)
if not src:
if level == 0:
raise ValueError('The file %s is empty'%self.path)
self.is_binary = True
if not self.is_binary: if not self.is_binary:
if not encoding: if not encoding:
encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1] encoding = detect_xml_encoding(src[:4096], verbose=verbose)[1]

View File

@ -5,11 +5,15 @@ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>, '\
'2008, Anatoly Shipitsin <norguhtar at gmail.com>' '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
'''Read meta information from fb2 files''' '''Read meta information from fb2 files'''
import os import os, random, datetime
import datetime
from functools import partial from functools import partial
from string import ascii_letters, digits
from base64 import b64encode
from lxml import etree from lxml import etree
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
from calibre.utils.magick.draw import save_cover_data_to
from calibre import guess_type, guess_all_extensions, prints, force_unicode from calibre import guess_type, guess_all_extensions, prints, force_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn from calibre.ebooks.metadata import MetaInformation, check_isbn
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
@ -22,6 +26,12 @@ NAMESPACES = {
XPath = partial(etree.XPath, namespaces=NAMESPACES) XPath = partial(etree.XPath, namespaces=NAMESPACES)
tostring = partial(etree.tostring, method='text', encoding=unicode) tostring = partial(etree.tostring, method='text', encoding=unicode)
def FB2(tag):
return '{%s}%s'%(NAMESPACES['fb2'], tag)
def XLINK(tag):
return '{%s}%s'%(NAMESPACES['xlink'], tag)
def get_metadata(stream): def get_metadata(stream):
''' Return fb2 metadata as a L{MetaInformation} object ''' ''' Return fb2 metadata as a L{MetaInformation} object '''
@ -85,6 +95,7 @@ def _parse_authors(root):
authors = [] authors = []
# pick up authors but only from 1 secrion <title-info>; otherwise it is not consistent! # pick up authors but only from 1 secrion <title-info>; otherwise it is not consistent!
# Those are fallbacks: <src-title-info>, <document-info> # Those are fallbacks: <src-title-info>, <document-info>
author = None
for author_sec in ['title-info', 'src-title-info', 'document-info']: for author_sec in ['title-info', 'src-title-info', 'document-info']:
for au in XPath('//fb2:%s/fb2:author'%author_sec)(root): for au in XPath('//fb2:%s/fb2:author'%author_sec)(root):
author = _parse_author(au) author = _parse_author(au)
@ -211,8 +222,8 @@ def _parse_publisher(root, mi):
def _parse_pubdate(root, mi): def _parse_pubdate(root, mi):
year = XPath('number(//fb2:publish-info/fb2:year/text())')(root) year = XPath('number(//fb2:publish-info/fb2:year/text())')(root)
if float.is_integer(year): if float.is_integer(year):
# only year is available, so use 1-st of Jan # only year is available, so use 2nd of June
mi.pubdate = datetime.date(int(year), 1, 1) mi.pubdate = datetime.date(int(year), 6, 2)
def _parse_timestamp(root, mi): def _parse_timestamp(root, mi):
#<date value="1996-12-03">03.12.1996</date> #<date value="1996-12-03">03.12.1996</date>
@ -239,3 +250,135 @@ def _get_fbroot(stream):
raw = xml_to_unicode(raw, strip_encoding_pats=True)[0] raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
root = etree.fromstring(raw, parser=parser) root = etree.fromstring(raw, parser=parser)
return root return root
def _clear_meta_tags(doc, tag):
for parent in ('title-info', 'src-title-info', 'publish-info'):
for x in XPath('//fb2:%s/fb2:%s'%(parent, tag))(doc):
x.getparent().remove(x)
def _set_title(title_info, mi):
if not mi.is_null('title'):
_clear_meta_tags(title_info, 'book-title')
title = _get_or_create(title_info, 'book-title')
title.text = mi.title
def _text2fb2(parent, text):
lines = text.split('\n')
for line in lines:
line = line.strip()
if line:
p = _create_tag(parent, 'p', at_start=False)
p.text = line
else:
_create_tag(parent, 'empty-line', at_start=False)
def _set_comments(title_info, mi):
if not mi.is_null('comments'):
from calibre.utils.html2text import html2text
_clear_meta_tags(title_info, 'annotation')
title = _get_or_create(title_info, 'annotation')
_text2fb2(title, html2text(mi.comments))
def _set_authors(title_info, mi):
if not mi.is_null('authors'):
_clear_meta_tags(title_info, 'author')
for author in mi.authors:
author_parts = author.split()
if not author_parts: continue
atag = _create_tag(title_info, 'author')
if len(author_parts) == 1:
_create_tag(atag, 'nickname').text = author
else:
_create_tag(atag, 'first-name').text = author_parts[0]
author_parts = author_parts[1:]
if len(author_parts) > 1:
_create_tag(atag, 'middle-name', at_start=False).text = author_parts[0]
author_parts = author_parts[1:]
if author_parts:
_create_tag(atag, 'last-name', at_start=False).text = ' '.join(author_parts)
def _set_tags(title_info, mi):
if not mi.is_null('tags'):
_clear_meta_tags(title_info, 'genre')
for t in mi.tags:
tag = _create_tag(title_info, 'genre')
tag.text = t
def _set_series(title_info, mi):
if not mi.is_null('series'):
_clear_meta_tags(title_info, 'sequence')
seq = _get_or_create(title_info, 'sequence')
seq.set('name', mi.series)
try:
seq.set('number', '%g'%mi.series_index)
except:
seq.set('number', '1')
def _rnd_name(size=8, chars=ascii_letters + digits):
return ''.join(random.choice(chars) for x in range(size))
def _rnd_pic_file_name(prefix='calibre_cover_', size=32, ext='jpg'):
return prefix + _rnd_name(size=size) + '.' + ext
def _encode_into_jpeg(data):
data = save_cover_data_to(data, 'cover.jpg', return_data=True)
return b64encode(data)
def _set_cover(title_info, mi):
if not mi.is_null('cover_data') and mi.cover_data[1]:
coverpage = _get_or_create(title_info, 'coverpage')
cim_tag = _get_or_create(coverpage, 'image')
if cim_tag.attrib.has_key(XLINK('href')):
cim_filename = cim_tag.attrib[XLINK('href')][1:]
else:
cim_filename = _rnd_pic_file_name('cover')
cim_tag.attrib[XLINK('href')] = '#' + cim_filename
fb2_root = cim_tag.getroottree().getroot()
cim_binary = _get_or_create(fb2_root, 'binary', attribs={'id': cim_filename}, at_start=False)
cim_binary.attrib['content-type'] = 'image/jpeg'
cim_binary.text = _encode_into_jpeg(mi.cover_data[1])
def _create_tag(parent, tag, attribs={}, at_start=True):
ans = parent.makeelement(FB2(tag))
ans.attrib.update(attribs)
if at_start:
parent.insert(0, ans)
else:
parent.append(ans)
return ans
def _get_or_create(parent, tag, attribs={}, at_start=True):
xpathstr='./fb2:'+tag
for n, v in attribs.items():
xpathstr += '[@%s="%s"]' % (n, v)
ans = XPath(xpathstr)(parent)
if ans:
ans = ans[0]
else:
ans = _create_tag(parent, tag, attribs, at_start)
return ans
def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
stream.seek(0)
root = _get_fbroot(stream)
desc = _get_or_create(root, 'description')
ti = _get_or_create(desc, 'title-info')
indent = ti.text
_set_comments(ti, mi)
_set_series(ti, mi)
_set_tags(ti, mi)
_set_authors(ti, mi)
_set_title(ti, mi)
_set_cover(ti, mi)
for child in ti:
child.tail = indent
stream.seek(0)
stream.truncate()
stream.write(etree.tostring(root, method='xml', encoding='utf-8',
xml_declaration=True))

View File

@ -18,7 +18,7 @@ from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase, from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
fixauthors) fixauthors)
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date from calibre.utils.date import parse_only_date
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
class Worker(Thread): # Get details {{{ class Worker(Thread): # Get details {{{
@ -471,7 +471,7 @@ class Worker(Thread): # Get details {{{
ans = x.tail ans = x.tail
date = ans.rpartition('(')[-1].replace(')', '').strip() date = ans.rpartition('(')[-1].replace(')', '').strip()
date = self.delocalize_datestr(date) date = self.delocalize_datestr(date)
return parse_date(date, assume_utc=True) return parse_only_date(date, assume_utc=True)
def parse_language(self, pd): def parse_language(self, pd):
for x in reversed(pd.xpath(self.language_xpath)): for x in reversed(pd.xpath(self.language_xpath)):

View File

@ -6,7 +6,6 @@ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re import re
import datetime
from urllib import quote_plus from urllib import quote_plus
from Queue import Queue, Empty from Queue import Queue, Empty
@ -14,6 +13,7 @@ from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_only_date
class Ozon(Source): class Ozon(Source):
name = 'OZON.ru' name = 'OZON.ru'
@ -454,9 +454,7 @@ def toPubdate(log, yearAsString): # {{{
res = None res = None
if yearAsString: if yearAsString:
try: try:
year = int(yearAsString) res = parse_only_date(yearAsString)
# only year is available, so use 1-st of Jan
res = datetime.datetime(year, 1, 1)
except: except:
log.error('cannot parse to date %s'%yearAsString) log.error('cannot parse to date %s'%yearAsString)
return res return res

View File

@ -306,6 +306,11 @@ class MOBIHeader(object): # {{{
self.extra_data_flags = 0 self.extra_data_flags = 0
if self.has_extra_data_flags: if self.has_extra_data_flags:
self.unknown4 = self.raw[184:192] self.unknown4 = self.raw[184:192]
if self.file_version < 8:
self.first_text_record, self.last_text_record = \
struct.unpack_from(b'>HH', self.raw, 192)
self.fdst_count = struct.unpack_from(b'>L', self.raw, 196)
else:
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL', self.fdst_idx, self.fdst_count = struct.unpack_from(b'>LL',
self.raw, 192) self.raw, 192)
if self.fdst_count <= 1: if self.fdst_count <= 1:
@ -409,6 +414,10 @@ class MOBIHeader(object): # {{{
a('DRM Flags: %r'%self.drm_flags) a('DRM Flags: %r'%self.drm_flags)
if self.has_extra_data_flags: if self.has_extra_data_flags:
a('Unknown4: %r'%self.unknown4) a('Unknown4: %r'%self.unknown4)
if hasattr(self, 'first_text_record'):
a('First content record: %d'%self.first_text_record)
a('Last content record: %d'%self.last_text_record)
else:
r('FDST Index', 'fdst_idx') r('FDST Index', 'fdst_idx')
a('FDST Count: %d'% self.fdst_count) a('FDST Count: %d'% self.fdst_count)
r('FCIS number', 'fcis_number') r('FCIS number', 'fcis_number')

View File

@ -159,7 +159,7 @@ class NCXIndex(Index):
if self.table is not None: if self.table is not None:
NCXEntry = namedtuple('NCXEntry', 'index start length depth parent ' NCXEntry = namedtuple('NCXEntry', 'index start length depth parent '
'first_child last_child title pos_fid') 'first_child last_child title pos_fid kind')
for num, x in enumerate(self.table.iteritems()): for num, x in enumerate(self.table.iteritems()):
text, tag_map = x text, tag_map = x
@ -192,7 +192,7 @@ class NCXIndex(Index):
length=e['len'], depth=e['hlvl'], parent=refindx(e, length=e['len'], depth=e['hlvl'], parent=refindx(e,
'parent'), first_child=refindx(e, 'child1'), 'parent'), first_child=refindx(e, 'child1'),
last_child=refindx(e, 'childn'), title=e['text'], last_child=refindx(e, 'childn'), title=e['text'],
pos_fid=e['pos_fid']) pos_fid=e['pos_fid'], kind=e['kind'])
self.records.append(entry) self.records.append(entry)

View File

@ -189,11 +189,11 @@ class MOBIFile(object):
def read_tbs(self): def read_tbs(self):
from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC, from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC,
collect_indexing_data, encode_strands_as_sequences, collect_indexing_data, encode_strands_as_sequences,
sequences_to_bytes) sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex)
entry_map = [] entry_map = []
for index in self.ncx_index: for index in self.ncx_index:
vals = list(index)[:-1] + [None, None, None, None] vals = list(index)[:-1] + [None, None, None, None]
entry_map.append(Entry(*vals)) entry_map.append(Entry(*(vals[:12])))
indexing_data = collect_indexing_data(entry_map, list(map(len, indexing_data = collect_indexing_data(entry_map, list(map(len,
@ -206,6 +206,14 @@ class MOBIFile(object):
the start of the text record. the start of the text record.
''')] ''')]
tbs_type = 8
try:
calculate_all_tbs(indexing_data)
except NegativeStrandIndex:
calculate_all_tbs(indexing_data, tbs_type=5)
tbs_type = 5
for i, strands in enumerate(indexing_data): for i, strands in enumerate(indexing_data):
rec = self.text_records[i] rec = self.text_records[i]
tbs_bytes = rec.trailing_data.get('indexing', b'') tbs_bytes = rec.trailing_data.get('indexing', b'')
@ -236,8 +244,12 @@ class MOBIFile(object):
desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1])) desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1]))
if tbs_bytes: if tbs_bytes:
desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes)) desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes))
calculated_sequences = encode_strands_as_sequences(strands) calculated_sequences = encode_strands_as_sequences(strands,
tbs_type=tbs_type)
try:
calculated_bytes = sequences_to_bytes(calculated_sequences) calculated_bytes = sequences_to_bytes(calculated_sequences)
except:
calculated_bytes = b'failed to calculate tbs bytes'
if calculated_bytes != otbs: if calculated_bytes != otbs:
print ('WARNING: TBS mismatch for record %d'%i) print ('WARNING: TBS mismatch for record %d'%i)
desc.append('WARNING: TBS mismatch!') desc.append('WARNING: TBS mismatch!')

View File

@ -45,6 +45,10 @@ class EXTHHeader(object): # {{{
elif idx == 202: elif idx == 202:
self.thumbnail_offset, = struct.unpack('>L', content) self.thumbnail_offset, = struct.unpack('>L', content)
elif idx == 501: elif idx == 501:
try:
self.cdetype = content.decode('ascii')
except UnicodeDecodeError:
self.cdetype = None
# cdetype # cdetype
if content == b'EBSP': if content == b'EBSP':
if not self.mi.tags: if not self.mi.tags:
@ -109,8 +113,11 @@ class EXTHHeader(object): # {{{
self.mi.isbn = raw self.mi.isbn = raw
except: except:
pass pass
elif idx == 113: elif idx == 113: # ASIN or other id
pass # ASIN or UUID try:
self.uuid = content.decode('ascii')
except:
self.uuid = None
elif idx == 116: elif idx == 116:
self.start_offset, = struct.unpack(b'>L', content) self.start_offset, = struct.unpack(b'>L', content)
elif idx == 121: elif idx == 121:

View File

@ -111,7 +111,11 @@ def update_flow_links(mobi8_reader, resource_map, log):
continue continue
if not isinstance(flow, unicode): if not isinstance(flow, unicode):
try:
flow = flow.decode(mr.header.codec) flow = flow.decode(mr.header.codec)
except UnicodeDecodeError:
log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
flow = flow.decode(mr.header.codec, 'replace')
# links to raster image files from image tags # links to raster image files from image tags
# image_pattern # image_pattern

View File

@ -207,9 +207,9 @@ class Mobi8Reader(object):
fname = 'svgimg' + nstr + '.svg' fname = 'svgimg' + nstr + '.svg'
else: else:
# search for CDATA and if exists inline it # search for CDATA and if exists inline it
if flowpart.find('[CDATA[') >= 0: if flowpart.find(b'[CDATA[') >= 0:
typ = 'css' typ = 'css'
flowpart = '<style type="text/css">\n' + flowpart + '\n</style>\n' flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
format = 'inline' format = 'inline'
dir = None dir = None
fname = None fname = None

View File

@ -31,6 +31,10 @@ def do_explode(path, dest):
with CurrentDir(dest): with CurrentDir(dest):
mr = Mobi8Reader(mr, default_log) mr = Mobi8Reader(mr, default_log)
opf = os.path.abspath(mr()) opf = os.path.abspath(mr())
try:
os.remove('debug-raw.html')
except:
pass
return opf return opf
@ -52,7 +56,10 @@ def explode(path, dest, question=lambda x:True):
kf8_type = header.kf8_type kf8_type = header.kf8_type
if kf8_type is None: if kf8_type is None:
raise BadFormat('This MOBI file does not contain a KF8 format book') raise BadFormat(_('This MOBI file does not contain a KF8 format '
'book. KF8 is the new format from Amazon. calibre can '
'only tweak MOBI files that contain KF8 books. Older '
'MOBI files without KF8 are not tweakable.'))
if kf8_type == 'joint': if kf8_type == 'joint':
if not question(_('This MOBI file contains both KF8 and ' if not question(_('This MOBI file contains both KF8 and '
@ -64,6 +71,14 @@ def explode(path, dest, question=lambda x:True):
return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path, return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path,
dest), no_output=True)['result'] dest), no_output=True)['result']
def set_cover(oeb):
if 'cover' not in oeb.guide or oeb.metadata['cover']: return
cover = oeb.guide['cover']
if cover.href in oeb.manifest.hrefs:
item = oeb.manifest.hrefs[cover.href]
oeb.metadata.clear('cover')
oeb.metadata.add('cover', item.id)
def do_rebuild(opf, dest_path): def do_rebuild(opf, dest_path):
plumber = Plumber(opf, dest_path, default_log) plumber = Plumber(opf, dest_path, default_log)
plumber.setup_options() plumber.setup_options()
@ -72,6 +87,7 @@ def do_rebuild(opf, dest_path):
plumber.opts.mobi_passthrough = True plumber.opts.mobi_passthrough = True
oeb = create_oebbook(default_log, opf, plumber.opts) oeb = create_oebbook(default_log, opf, plumber.opts)
set_cover(oeb)
outp.convert(oeb, dest_path, inp, plumber.opts, default_log) outp.convert(oeb, dest_path, inp, plumber.opts, default_log)
def rebuild(src_dir, dest_path): def rebuild(src_dir, dest_path):
@ -79,6 +95,8 @@ def rebuild(src_dir, dest_path):
if not opf: if not opf:
raise ValueError('No OPF file found in %s'%src_dir) raise ValueError('No OPF file found in %s'%src_dir)
opf = opf[0] opf = opf[0]
# For debugging, uncomment the following line
# def fork_job(a, b, args=None, no_output=True): do_rebuild(*args)
fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path), fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path),
no_output=True) no_output=True)

View File

@ -382,6 +382,7 @@ class MobiWriter(object):
first_image_record = len(self.records) first_image_record = len(self.records)
self.resources.serialize(self.records, used_images) self.resources.serialize(self.records, used_images)
resource_record_count = len(self.records) - old resource_record_count = len(self.records) - old
last_content_record = len(self.records) - 1
# FCIS/FLIS (Seems to serve no purpose) # FCIS/FLIS (Seems to serve no purpose)
flis_number = len(self.records) flis_number = len(self.records)
@ -406,7 +407,7 @@ class MobiWriter(object):
# header # header
header_fields['first_resource_record'] = first_image_record header_fields['first_resource_record'] = first_image_record
header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this header_fields['exth_flags'] = 0b100001010000 # Kinglegen uses this
header_fields['fdst_record'] = NULL_INDEX header_fields['fdst_record'] = pack(b'>HH', 1, last_content_record)
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1 header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
header_fields['flis_record'] = flis_number header_fields['flis_record'] = flis_number
header_fields['fcis_record'] = fcis_number header_fields['fcis_record'] = fcis_number

View File

@ -0,0 +1,25 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.oeb.base import XPath
class CSSCleanup(object):
def __init__(self, log, opts):
self.log, self.opts = log, opts
def __call__(self, item, stylizer):
if not hasattr(item.data, 'xpath'): return
# The Kindle touch displays all black pages if the height is set on
# body
for body in XPath('//h:body')(item.data):
style = stylizer.style(body)
style.drop('height')

View File

@ -56,7 +56,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
items][:1] items][:1]
else: else:
creators = [unicode(c) for c in items] creators = [unicode(c) for c in items]
items = ['; '.join(creators)] items = creators
for item in items: for item in items:
data = unicode(item) data = unicode(item)
if term != 'description': if term != 'description':

View File

@ -316,6 +316,9 @@ class NCXIndex(Index):
desc = entry.get('description', None) desc = entry.get('description', None)
if desc: if desc:
strings.append(desc) strings.append(desc)
kind = entry.get('kind', None)
if kind:
strings.append(kind)
self.cncx = CNCX(strings) self.cncx = CNCX(strings)
def to_entry(x): def to_entry(x):
@ -324,7 +327,7 @@ class NCXIndex(Index):
'first_child', 'last_child'): 'first_child', 'last_child'):
if f in x: if f in x:
ans[f] = x[f] ans[f] = x[f]
for f in ('label', 'description', 'author'): for f in ('label', 'description', 'author', 'kind'):
if f in x: if f in x:
ans[f] = self.cncx[x[f]] ans[f] = self.cncx[x[f]]
return ('%02x'%x['index'], ans) return ('%02x'%x['index'], ans)
@ -333,3 +336,20 @@ class NCXIndex(Index):
class NonLinearNCXIndex(NCXIndex):
control_byte_count = 2
tag_types = tuple(map(TagMeta, (
('offset', 1, 1, 1, 0),
('length', 2, 1, 2, 0),
('label', 3, 1, 4, 0),
('depth', 4, 1, 8, 0),
('kind', 5, 1, 16, 0),
('parent', 21, 1, 32, 0),
('first_child', 22, 1, 64, 0),
('last_child', 23, 1, 128, 0),
EndTagTable,
('pos_fid', 6, 2, 1, 0),
EndTagTable
)))

View File

@ -25,7 +25,7 @@ from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
from calibre.ebooks.oeb.parse_utils import barename from calibre.ebooks.oeb.parse_utils import barename
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex, from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
ChunkIndex, GuideIndex) ChunkIndex, GuideIndex, NonLinearNCXIndex)
from calibre.ebooks.mobi.writer8.mobi import KF8Book from calibre.ebooks.mobi.writer8.mobi import KF8Book
from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences from calibre.ebooks.mobi.writer8.tbs import apply_trailing_byte_sequences
from calibre.ebooks.mobi.writer8.toc import TOCAdder from calibre.ebooks.mobi.writer8.toc import TOCAdder
@ -314,11 +314,10 @@ class KF8Writer(object):
return return
# Flatten the ToC into a depth first list # Flatten the ToC into a depth first list
fl = toc.iter() if is_periodical else toc.iterdescendants() fl = toc.iterdescendants()
for i, item in enumerate(fl): for i, item in enumerate(fl):
entry = {'id': id(item), 'index': i, 'href':item.href, entry = {'id': id(item), 'index': i, 'label':(item.title or
'label':(item.title or _('Unknown')), _('Unknown')), 'children':[]}
'children':[]}
entry['depth'] = getattr(item, 'ncx_hlvl', 0) entry['depth'] = getattr(item, 'ncx_hlvl', 0)
p = getattr(item, 'ncx_parent', None) p = getattr(item, 'ncx_parent', None)
if p is not None: if p is not None:
@ -333,14 +332,45 @@ class KF8Writer(object):
if item.description: if item.description:
entry['description'] = item.description entry['description'] = item.description
entries.append(entry) entries.append(entry)
href = item.href or ''
href, frag = href.partition('#')[0::2]
aid = self.id_map.get((href, frag), None)
if aid is None:
aid = self.id_map.get((href, ''), None)
if aid is None:
pos, fid = 0, 0
chunk = self.chunk_table[pos]
offset = chunk.insert_pos + fid
else:
pos, fid, offset = self.aid_offset_map[aid]
entry['pos_fid'] = (pos, fid)
entry['offset'] = offset
# The Kindle requires entries to be sorted by (depth, playorder) # The Kindle requires entries to be sorted by (depth, playorder)
entries.sort(key=lambda entry: (entry['depth'], entry['index'])) # However, I cannot figure out how to deal with non linear ToCs, i.e.
# ToCs whose nth entry at depth d has an offset after its n+k entry at
# the same depth, so we sort on (depth, offset) instead. This re-orders
# the ToC to be linear. A non-linear ToC causes section to section
# jumping to not work. kindlegen somehow handles non-linear tocs, but I
# cannot figure out how.
original = sorted(entries,
key=lambda entry: (entry['depth'], entry['index']))
linearized = sorted(entries,
key=lambda entry: (entry['depth'], entry['offset']))
is_non_linear = original != linearized
entries = linearized
is_non_linear = False # False as we are using the linearized entries
if is_non_linear:
for entry in entries:
entry['kind'] = 'chapter'
for i, entry in enumerate(entries): for i, entry in enumerate(entries):
entry['index'] = i entry['index'] = i
id_to_index = {entry['id']:entry['index'] for entry in entries} id_to_index = {entry['id']:entry['index'] for entry in entries}
# Write the hierarchical and start offset information # Write the hierarchical information
for entry in entries: for entry in entries:
children = entry.pop('children') children = entry.pop('children')
if children: if children:
@ -348,19 +378,6 @@ class KF8Writer(object):
entry['last_child'] = id_to_index[children[-1]] entry['last_child'] = id_to_index[children[-1]]
if 'parent_id' in entry: if 'parent_id' in entry:
entry['parent'] = id_to_index[entry.pop('parent_id')] entry['parent'] = id_to_index[entry.pop('parent_id')]
href = entry.pop('href')
href, frag = href.partition('#')[0::2]
aid = self.id_map.get((href, frag), None)
if aid is None:
aid = self.id_map.get((href, ''), None)
if aid is None:
pos, fid = 0, 0
else:
pos, fid = self.aid_offset_map[aid]
chunk = self.chunk_table[pos]
offset = chunk.insert_pos + fid
entry['pos_fid'] = (pos, fid)
entry['offset'] = offset
# Write the lengths # Write the lengths
def get_next_start(entry): def get_next_start(entry):
@ -369,13 +386,13 @@ class KF8Writer(object):
if enders: if enders:
return min(enders) return min(enders)
return len(self.flows[0]) return len(self.flows[0])
for entry in entries: for entry in entries:
entry['length'] = get_next_start(entry) - entry['offset'] entry['length'] = get_next_start(entry) - entry['offset']
self.has_tbs = apply_trailing_byte_sequences(entries, self.records, self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
self.uncompressed_record_lengths) self.uncompressed_record_lengths)
self.ncx_records = NCXIndex(entries)() idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex
self.ncx_records = idx_type(entries)()
def create_guide(self): def create_guide(self):
self.start_offset = None self.start_offset = None
@ -389,12 +406,9 @@ class KF8Writer(object):
aid = self.id_map.get((href, '')) aid = self.id_map.get((href, ''))
if aid is None: if aid is None:
continue continue
pos, fid = self.aid_offset_map[aid] pos, fid, offset = self.aid_offset_map[aid]
if is_guide_ref_start(ref): if is_guide_ref_start(ref):
chunk = self.chunk_table[pos] self.start_offset = offset
skel = [s for s in self.skel_table if s.file_number ==
chunk.file_number][0]
self.start_offset = skel.start_pos + skel.length + chunk.start_pos + fid
self.guide_table.append(GuideRef(ref.title or self.guide_table.append(GuideRef(ref.title or
_('Unknown'), ref.type, (pos, fid))) _('Unknown'), ref.type, (pos, fid)))

View File

@ -138,6 +138,8 @@ class MOBIHeader(Header): # {{{
unknown2 = zeroes(8) unknown2 = zeroes(8)
# 192: FDST # 192: FDST
# In MOBI 6 the fdst record is instead two two byte fields storing the
# index of the first and last content records
fdst_record = DYN fdst_record = DYN
fdst_count = DYN fdst_count = DYN

Some files were not shown because too many files have changed in this diff Show More