Sync to trunk.
170
Changelog.yaml
@ -19,6 +19,176 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.7.57
|
||||
date: 2011-04-22
|
||||
|
||||
new features:
|
||||
- title: "Launch worker processes on demand instead of keeping a pool of them in memory. Reduces memory footprint."
|
||||
|
||||
- title: "Use the visual formatting of the Table of Contents to try to automatically create a multi-level TOC when converting/viewing MOBI files."
|
||||
tickets: [763681]
|
||||
|
||||
- title: "Add a new function booksize() to the template language to get the value of the size column in calibre."
|
||||
|
||||
- title: "Add support for using metadata plugboards with the content server (only with the epub format)"
|
||||
|
||||
- title: "Change default algorithm for automatically computing author sort to be more intelligent and handle the case when the author name has a comma in it"
|
||||
|
||||
- title: "Show cover size in the tooltips of the book details panel and book details popup window"
|
||||
|
||||
bug fixes:
|
||||
- title: "Dragging and dropping a cover onto the book details panel did not change the cover size"
|
||||
tickets: [768332]
|
||||
|
||||
- title: "Fix non-escaped '|' when searching for commas in authors using REGEXP_MATCH"
|
||||
|
||||
- title: "Fix ratings in templates being multiplied by 2"
|
||||
|
||||
- title: "Fix adding a comma to custom series values when using completion."
|
||||
tickets: [763788]
|
||||
|
||||
- title: "CHM Input: Another workaround for a Microsoft mess."
|
||||
tickets: [763336]
|
||||
|
||||
- title: "Fix job count in the spinner not always being updated when a job completes"
|
||||
|
||||
- title: "Changing case only of a title does not update title sort"
|
||||
tickets: [768904]
|
||||
|
||||
improved recipes:
|
||||
- ecuisine.ro, egirl.ro and tabu.ro
|
||||
- Daily Telegraph
|
||||
- Handelsblatt
|
||||
- Il Sole 24 Ore
|
||||
- Newsweek
|
||||
- Arcamax
|
||||
|
||||
new recipes:
|
||||
- title: BabyOnline.ro
|
||||
author: Silviu Cotoara
|
||||
|
||||
- title: "The Journal.ie"
|
||||
author: Phil Burns
|
||||
|
||||
- title: "Der Spiegel"
|
||||
author: Nikolas Mangold
|
||||
|
||||
- version: 0.7.56
|
||||
date: 2011-04-17
|
||||
|
||||
new features:
|
||||
- title: "This is primarily a bug fix release that fixes a bug in 0.7.55 that caused calibre to rescan the files on the device every time the device is connected. If you updated to 0.7.55 it is highly recommended you update to 0.7.56"
|
||||
|
||||
- title: "Device driver for Coby Kyros"
|
||||
|
||||
- title: "Remove the quick access to search options from next to the search bar, as we now have a separate search highlights toggle button"
|
||||
|
||||
- title: "MOBI Output: Ensure that MOBI files always have 8KB worth of null bytes at the end of record 0. This appears to be necessary for Amazon to be able to add DRM to calibre generated MOBI files sent to their publishing service."
|
||||
|
||||
- title: "Add a tool to inspect MOBI files. To use: calibre-debug -m file.mobi"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fixed regression taht caused calibre to rescan files on the device on every reconnect"
|
||||
|
||||
- title: "Fix donate button causing the toolbar to be too large on OS X"
|
||||
|
||||
- title: "MOBI Input: Fix detection of Table of Contents for MOBI files that have a page break between the location designated as the Table of Contents and the actual table of contents."
|
||||
tickets: [763504]
|
||||
|
||||
- title: "Comic Input: Fix handling of some CBZ files that have wrongly encoded non ASCII filenames on windows."
|
||||
tickets: [763280]
|
||||
|
||||
- title: "PML Input: Fix multi-line chapter title causing a spurious page break"
|
||||
tickets: [763238]
|
||||
|
||||
- title: "EPUB Input: Speed up processing of files with very large manifest/spines"
|
||||
|
||||
- title: "Fix regression that broke cover:False searches in 0.7.55"
|
||||
|
||||
improved recipes:
|
||||
- Suedduetsche Zeitung
|
||||
- Irish Times
|
||||
- Big Oven
|
||||
- NSPM
|
||||
|
||||
|
||||
- version: 0.7.55
|
||||
date: 2011-04-15
|
||||
|
||||
new features:
|
||||
- title: "Add a menu bar. Useful if you use a lot of plugins and are running out of space in your toolbars. By default the menu bar is hidden (except on OS X). You can add actions to it via Preferences->Toolbars. As soon as you add actions, it will become visible."
|
||||
|
||||
- title: "OS X: Make the main calibre window look a little more 'native' on OS X"
|
||||
|
||||
- title: "Show recently viewed books in the View button's drop down menu"
|
||||
|
||||
- title: "Add a button next to the search bar to toggle easily between highlight and restrict search modes"
|
||||
|
||||
- title: "Allow the use of arbitrary searches as search restrictions, rather than just saved searches. Do this by using the special entry '*Current Search' in the Search Restriction dropdown."
|
||||
|
||||
- title: "The Connect/share icon now changes color to indicate that the content server is running"
|
||||
tickets: [755444]
|
||||
|
||||
- title: "Device drivers for Viewpad 7, Motorola Xoom and Asus Eee Note"
|
||||
|
||||
- title: "Add tags like composite custom column."
|
||||
tickets: [759663]
|
||||
|
||||
- title: "Add a new date format code 'iso'. Permits formatting dates to see the complete time (via Preferences->Tweaks)"
|
||||
|
||||
- title: "Allow the use of data from the size column in the template language"
|
||||
tickets: [759645]
|
||||
|
||||
- title: "Support reading/writing covers to txtz/htmlz files"
|
||||
|
||||
- title: "Speedup for large library sorting when using composite custom columns"
|
||||
|
||||
- title: "Move the boolean columns are tristate tweak to Preferences->Behavior"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix a regression in 0.7.54 that broke reading covers/metadata from cbz files."
|
||||
tickets: [756892]
|
||||
|
||||
- title: "Fix tweak names and help not translatable"
|
||||
tickets: [756736]
|
||||
|
||||
- title: "When the size of a book is less that 0.1MB but not zero, display the size as <0.1 instead of 0.0."
|
||||
tickets: [755768]
|
||||
|
||||
- title: "HTMLZ input: Fix handling of HTML files encoded in an encoding other than UTF-8"
|
||||
|
||||
- title: "EPUB Input: Fix EPUB files with empty Adobe PAGE templates causing conversion to abort."
|
||||
tickets: [760390]
|
||||
|
||||
- title: "Fix CHM input plugin not closing opened input file"
|
||||
tickets: [760589]
|
||||
|
||||
- title: "MOBI Output: Make super/subscripts use a slightly smaller font when rendered on a Kindle. Also allow the use of vertical-align:top/bottom in the CSS to specify a super/subscript."
|
||||
tickets: [758667]
|
||||
|
||||
- title: "LRF Input: Detect and workaround LRF files that have deeply nested spans, instead of crashing."
|
||||
tickets: [759680]
|
||||
|
||||
- title: "MOBI Output: Fix bug that would cause conversion to unneccessarily abort when malformed hyperlinks are present in the input document."
|
||||
tickets: [759313]
|
||||
|
||||
- title: "Make true and false searches work correctly for numeric fields."
|
||||
|
||||
- title: "MOBI Output: The Ignore margins setting no longer ignores blockquotes, only margins set via CSS on other elements."
|
||||
tickets: [758675]
|
||||
|
||||
- title: "Fix regression that caused clicking auto send to also change the email address in Preferences->Email"
|
||||
|
||||
improved recipes:
|
||||
- Wall Street Journal
|
||||
- Weblogs SL
|
||||
- Tabu.ro
|
||||
- Vecernje Novosti
|
||||
|
||||
new recipes:
|
||||
- title: Hallo Assen and Dvhn
|
||||
author: Reijendert
|
||||
|
||||
|
||||
- version: 0.7.54
|
||||
date: 2011-04-08
|
||||
|
@ -6,12 +6,13 @@ __copyright__ = 'Copyright 2010 Starson17'
|
||||
www.arcamax.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Arcamax(BasicNewsRecipe):
|
||||
title = 'Arcamax'
|
||||
__author__ = 'Starson17'
|
||||
__version__ = '1.03'
|
||||
__date__ = '25 November 2010'
|
||||
__version__ = '1.04'
|
||||
__date__ = '18 April 2011'
|
||||
description = u'Family Friendly Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
|
||||
category = 'news, comics'
|
||||
language = 'en'
|
||||
@ -30,8 +31,15 @@ class Arcamax(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['toon']}),
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['comics-header']}),
|
||||
dict(name='b', attrs={'class':['current']}),
|
||||
dict(name='article', attrs={'class':['comic']}),
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['comicfull' ]}),
|
||||
dict(name='div', attrs={'class':['calendar' ]}),
|
||||
dict(name='nav', attrs={'class':['calendar-nav' ]}),
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
@ -71,7 +79,6 @@ class Arcamax(BasicNewsRecipe):
|
||||
#(u"Rugrats", u"http://www.arcamax.com/rugrats"),
|
||||
(u"Speed Bump", u"http://www.arcamax.com/speedbump"),
|
||||
(u"Wizard of Id", u"http://www.arcamax.com/wizardofid"),
|
||||
(u"Dilbert", u"http://www.arcamax.com/dilbert"),
|
||||
(u"Zits", u"http://www.arcamax.com/zits"),
|
||||
]:
|
||||
articles = self.make_links(url)
|
||||
@ -86,24 +93,37 @@ class Arcamax(BasicNewsRecipe):
|
||||
for page in pages:
|
||||
page_soup = self.index_to_soup(url)
|
||||
if page_soup:
|
||||
title = page_soup.find(name='div', attrs={'class':'toon'}).p.img['alt']
|
||||
title = page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0]
|
||||
page_url = url
|
||||
prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'next'}, text='Previous').parent['href']
|
||||
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
|
||||
# orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
|
||||
prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
|
||||
date = self.tag_to_string(page_soup.find(name='b', attrs={'class':['current']}))
|
||||
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date': date})
|
||||
url = prev_page_url
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
main_comic = soup.find('p',attrs={'class':'m0'})
|
||||
if main_comic.a['target'] == '_blank':
|
||||
main_comic.a.img['id'] = 'main_comic'
|
||||
for img_tag in soup.findAll('img'):
|
||||
parent_tag = img_tag.parent
|
||||
if parent_tag.name == 'a':
|
||||
new_tag = Tag(soup,'p')
|
||||
new_tag.insert(0,img_tag)
|
||||
parent_tag.replaceWith(new_tag)
|
||||
elif parent_tag.name == 'p':
|
||||
if not self.tag_to_string(parent_tag) == '':
|
||||
new_div = Tag(soup,'div')
|
||||
new_tag = Tag(soup,'p')
|
||||
new_tag.insert(0,img_tag)
|
||||
parent_tag.replaceWith(new_div)
|
||||
new_div.insert(0,new_tag)
|
||||
new_div.insert(1,parent_tag)
|
||||
return soup
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
img#main_comic {max-width:100%; min-width:100%;}
|
||||
img {max-width:100%; min-width:100%;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
59
recipes/babyonline.recipe
Normal file
@ -0,0 +1,59 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
||||
'''
|
||||
babyonline.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BabyOnline(BasicNewsRecipe):
|
||||
title = u'Baby Online'
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'De la p\u0103rinte la p\u0103rinte'
|
||||
publisher = u'Baby Online'
|
||||
oldest_article = 50
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Copii,Mame'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.babyonline.ro/images/default/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'article_container'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'bar_nav'}),
|
||||
dict(name='div', attrs={'id':'service_send'}),
|
||||
dict(name='div', attrs={'id':'other_videos'}),
|
||||
dict(name='div', attrs={'class':'dot_line_yellow'}),
|
||||
dict(name='a', attrs={'class':'print'}),
|
||||
dict(name='a', attrs={'class':'email'}),
|
||||
dict(name='a', attrs={'class':'YM'}),
|
||||
dict(name='a', attrs={'class':'comment'}),
|
||||
dict(name='div', attrs={'class':'tombstone_cross'}),
|
||||
dict(name='span', attrs={'class':'liketext'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':'service_send'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.babyonline.ro/rss_homepage.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -23,7 +23,7 @@ class BigOven(BasicNewsRecipe):
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
@ -36,29 +36,38 @@ class BigOven(BasicNewsRecipe):
|
||||
|
||||
remove_attributes = ['style', 'font']
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':['ppy-caption']})
|
||||
,dict(name='div', attrs={'id':['float_corner']})
|
||||
]
|
||||
def get_article_url(self, article):
|
||||
url = article.get('feedburner_origlink',article.get('link', None))
|
||||
front, middle, end = url.partition('comhttp//www.bigoven.com')
|
||||
url = front + 'com' + end
|
||||
return url
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['nosidebar_main']})]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['display-field']})]
|
||||
|
||||
remove_tags = [dict(name='ul', attrs={'class':['tabs']})]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'Want detailed nutrition information?', re.DOTALL), lambda match: ''),
|
||||
(re.compile('\(You could win \$100 in our ', re.DOTALL), lambda match: ''),
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
|
||||
tag.replaceWith(tag.string)
|
||||
for tag in soup.findAll(name='a', text=re.compile(r'.*View Metric.*', re.DOTALL)):
|
||||
tag.parent.parent.extract()
|
||||
for tag in soup.findAll(name='a', text=re.compile(r'.*Add my own photo.*', re.DOTALL)):
|
||||
tag.parent.parent.extract()
|
||||
for tag in soup.findAll(name='div', attrs={'class':['container']}):
|
||||
if tag.find(name='h1'):
|
||||
continue
|
||||
if tag.find(name='h2', text=re.compile(r'.*Ingredients.*', re.DOTALL)):
|
||||
print 'tag found Ingred h2'
|
||||
continue
|
||||
if tag.find(name='h2', text=re.compile(r'Preparation.*', re.DOTALL)):
|
||||
print 'tag found Prep h2'
|
||||
continue
|
||||
tag.extract()
|
||||
tag.parent.parent.extract()
|
||||
for tag in soup.findAll(text=re.compile(r'.*Try BigOven Pro for Free.*', re.DOTALL)):
|
||||
tag.extract()
|
||||
for tag in soup.findAll(text=re.compile(r'.*Add my photo of this recipe.*', re.DOTALL)):
|
||||
tag.parent.extract()
|
||||
for tag in soup.findAll(name='a', text=re.compile(r'.*photo contest.*', re.DOTALL)):
|
||||
tag.parent.extract()
|
||||
for tag in soup.findAll(name='a', text='Remove ads'):
|
||||
tag.parent.parent.extract()
|
||||
for tag in soup.findAll(name='ol', attrs={'class':['recipe-tags']}):
|
||||
tag.parent.extract()
|
||||
return soup
|
||||
|
||||
feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
|
||||
|
||||
feeds = [(u'Recent Raves', u'http://www.bigoven.com/rss/recentraves'),
|
||||
(u'Recipe Of The Day', u'http://feeds.feedburner.com/bigovencom-RecipeOfTheDay')]
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
clarin.com
|
||||
'''
|
||||
@ -18,11 +18,18 @@ class Clarin(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
delay = 1
|
||||
language = 'es_AR'
|
||||
publication_type = 'newspaper'
|
||||
INDEX = 'http://www.clarin.com'
|
||||
masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,serif; font-size: xx-large} .hora{font-weight:bold} .hd p{font-size: small} .nombre-autor{color: #0F325A} '
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif}
|
||||
h2{font-family: Georgia,serif; font-size: xx-large}
|
||||
.hora{font-weight:bold}
|
||||
.hd p{font-size: small}
|
||||
.nombre-autor{color: #0F325A}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -31,7 +38,9 @@ class Clarin(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['hd','mt']})]
|
||||
keep_only_tags = [dict(attrs={'class':['hd','mt']})]
|
||||
remove_tags = [dict(name=['meta','base','link'])]
|
||||
remove_attributes = ['lang','_mce_bogus']
|
||||
|
||||
feeds = [
|
||||
(u'Pagina principal', u'http://www.clarin.com/rss/' )
|
||||
@ -47,6 +56,10 @@ class Clarin(BasicNewsRecipe):
|
||||
,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' )
|
||||
]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?print=1'
|
||||
|
||||
|
@ -61,6 +61,12 @@ class DailyTelegraph(BasicNewsRecipe):
|
||||
(u'Entertainment News', u'http://feeds.news.com.au/public/rss/2.0/dtele_entertainment_news_201.xml'),
|
||||
(u'Lifestyle News', u'http://feeds.news.com.au/public/rss/2.0/dtele_lifestyle_227.xml'),
|
||||
(u'Music', u'http://feeds.news.com.au/public/rss/2.0/dtele_music_441.xml'),
|
||||
(u'Sport',
|
||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_sport_203.xml'),
|
||||
(u'Soccer',
|
||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_sports_soccer_344.xml'),
|
||||
(u'Rugby Union',
|
||||
u'http://feeds.news.com.au/public/rss/2.0/dtele_sports_rugby_union_342.xml'),
|
||||
(u'Property Confidential', u'http://feeds.news.com.au/public/rss/2.0/dtele_property_confidential_463.xml'),
|
||||
(u'Property - Your Space', u'http://feeds.news.com.au/public/rss/2.0/dtele_property_yourspace_462.xml'),
|
||||
(u'Confidential News', u'http://feeds.news.com.au/public/rss/2.0/dtele_entertainment_confidential_252.xml'),
|
||||
|
83
recipes/der_spiegel.recipe
Normal file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Nikolas Mangold <nmangold at gmail.com>'
|
||||
'''
|
||||
spiegel.de
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre import strftime
|
||||
from calibre import re
|
||||
|
||||
class DerSpiegel(BasicNewsRecipe):
|
||||
title = 'Der Spiegel'
|
||||
__author__ = 'Nikolas Mangold'
|
||||
description = 'Der Spiegel, Printed Edition. Access to paid content.'
|
||||
publisher = 'SPIEGEL-VERLAG RUDOLF AUGSTEIN GMBH & CO. KG'
|
||||
category = 'news, politics, Germany'
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
needs_subscription = True
|
||||
remove_empty_feeds = True
|
||||
delay = 1
|
||||
PREFIX = 'http://m.spiegel.de'
|
||||
INDEX = PREFIX + '/spiegel/print/epaper/index-heftaktuell.html'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/en/thumb/1/17/Der_Spiegel_logo.svg/200px-Der_Spiegel_logo.svg.png'
|
||||
language = 'de'
|
||||
publication_type = 'magazine'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} '
|
||||
timefmt = '[%W/%Y]'
|
||||
empty_articles = ['Titelbild']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p>◆</p>', re.DOTALL|re.IGNORECASE), lambda match: '<hr>'),
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
def has_login_name(form):
|
||||
try:
|
||||
form.find_control(name="f.loginName")
|
||||
except:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.PREFIX + '/meinspiegel/login.html')
|
||||
br.select_form(predicate=has_login_name)
|
||||
br['f.loginName' ] = self.username
|
||||
br['f.password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
remove_tags_before = dict(attrs={'class':'spArticleContent'})
|
||||
remove_tags_after = dict(attrs={'class':'spArticleCredit'})
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
|
||||
cover = soup.find('img', width=248)
|
||||
if cover is not None:
|
||||
self.cover_url = cover['src']
|
||||
|
||||
index = soup.find('dl')
|
||||
|
||||
feeds = []
|
||||
for section in index.findAll('dt'):
|
||||
section_title = self.tag_to_string(section).strip()
|
||||
self.log('Found section ', section_title)
|
||||
|
||||
articles = []
|
||||
for article in section.findNextSiblings(['dd','dt']):
|
||||
if article.name == 'dt':
|
||||
break
|
||||
link = article.find('a')
|
||||
title = self.tag_to_string(link).strip()
|
||||
if title in self.empty_articles:
|
||||
continue
|
||||
self.log('Found article ', title)
|
||||
url = self.PREFIX + link['href']
|
||||
articles.append({'title' : title, 'date' : strftime(self.timefmt), 'url' : url})
|
||||
feeds.append((section_title,articles))
|
||||
return feeds;
|
@ -14,14 +14,14 @@ class EcuisineRo(BasicNewsRecipe):
|
||||
__author__ = u'Silviu Cotoar\u0103'
|
||||
description = u'Reinventeaz\u0103 pl\u0103cerea de a g\u0103ti'
|
||||
publisher = 'eCuisine'
|
||||
oldest_article = 5
|
||||
oldest_article = 50
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Retete,Bucatarie'
|
||||
encoding = 'utf-8'
|
||||
cover_url = ''
|
||||
cover_url = 'http://www.ecuisine.ro/sites/all/themes/ecuisine/images/logo.gif'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
@ -31,8 +31,8 @@ class EcuisineRo(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'page-title'})
|
||||
, dict(name='div', attrs={'class':'content clearfix'})
|
||||
dict(name='h1', attrs={'id':'page-title'})
|
||||
, dict(name='div', attrs={'class':'field-item even'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
|
@ -31,8 +31,8 @@ class EgirlRo(BasicNewsRecipe):
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'title_art'})
|
||||
, dict(name='div', attrs={'class':'content_style'})
|
||||
dict(name='div', attrs={'id':'content_art'})
|
||||
, dict(name='div', attrs={'class':'content_articol'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
@ -53,6 +53,7 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
feeds = [
|
||||
(u'UK' , u'http://www.ft.com/rss/home/uk' )
|
||||
,(u'US' , u'http://www.ft.com/rss/home/us' )
|
||||
,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
|
||||
,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
|
||||
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
|
||||
]
|
||||
|
36
recipes/hallo_assen.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1302341394(BasicNewsRecipe):
|
||||
title = u'Hallo Assen'
|
||||
oldest_article = 180
|
||||
max_articles_per_feed = 100
|
||||
|
||||
__author__ = 'Reijndert'
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.halloassen.nl/multimedia/halloassen/archive/00002/HalloAssen_2518a.gif'
|
||||
language = 'nl'
|
||||
country = 'NL'
|
||||
version = 1
|
||||
category = u'Nieuws'
|
||||
timefmt = ' %Y-%m-%d (%a)'
|
||||
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'photoFrame'})
|
||||
,dict(name='div', attrs={'class':'textContent'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'id':'articleLinks'})
|
||||
,dict(name='div',attrs={'class':'categories clearfix'})
|
||||
,dict(name='div',attrs={'id':'rating'})
|
||||
,dict(name='div',attrs={'id':'comments'})
|
||||
]
|
||||
|
||||
feeds = [(u'Ons Nieuws', u'http://feeds.feedburner.com/halloassen/onsnieuws'), (u'Politie', u'http://www.halloassen.nl/rss/?c=37'), (u'Rechtbank', u'http://www.halloassen.nl/rss/?c=39'), (u'Justitie', u'http://www.halloassen.nl/rss/?c=36'), (u'Evenementen', u'http://www.halloassen.nl/rss/?c=34'), (u'Cultuur', u'http://www.halloassen.nl/rss/?c=32'), (u'Politiek', u'http://www.halloassen.nl/rss/?c=38'), (u'Economie', u'http://www.halloassen.nl/rss/?c=33')]
|
||||
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
||||
'''
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Handelsblatt(BasicNewsRecipe):
|
||||
@ -7,14 +6,11 @@ class Handelsblatt(BasicNewsRecipe):
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
|
||||
# cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
|
||||
language = 'de'
|
||||
# keep_only_tags = []
|
||||
keep_only_tags = (dict(name = 'div', attrs = {'class': ['hcf-detail-abstract hcf-teaser ajaxify','hcf-detail','hcf-author-wrapper']}))
|
||||
# keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
|
||||
remove_tags = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})
|
||||
,dict(name='ul' , attrs={'class':['hcf-detail-tools']})
|
||||
]
|
||||
|
||||
remove_tags_before = dict(attrs={'class':'hcf-overline'})
|
||||
remove_tags_after = dict(attrs={'class':'hcf-footer'})
|
||||
|
||||
feeds = [
|
||||
(u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
|
||||
@ -28,17 +24,16 @@ class Handelsblatt(BasicNewsRecipe):
|
||||
(u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
|
||||
(u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
.hcf-headline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:x-large;}
|
||||
.hcf-overline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:x-large;}
|
||||
.hcf-exclusive {font-family:Arial,Helvetica,sans-serif; font-style:italic;font-weight:bold; margin-right:5pt;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;}
|
||||
.hcf-location-mark{font-weight:bold; margin-right:5pt;}
|
||||
.MsoNormal{font-family:Helvetica,Arial,sans-serif;}
|
||||
.hcf-author-wrapper{font-style:italic;}
|
||||
.hcf-article-date{font-size:x-small;}
|
||||
.hcf-caption {font-style:italic;font-size:small;}
|
||||
img {align:left;}
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
url = url.split('/')
|
||||
url[-1] = 'v_detail_tab_print,'+url[-1]
|
||||
url = '/'.join(url)
|
||||
return url
|
||||
|
@ -1,9 +1,6 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
# Needed for BLOGs
|
||||
from calibre.web.feeds import Feed
|
||||
|
||||
class HBR(BasicNewsRecipe):
|
||||
|
||||
title = 'Harvard Business Review Blogs'
|
||||
@ -32,6 +29,7 @@ class HBR(BasicNewsRecipe):
|
||||
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
else:
|
||||
timefmt = ' [%B %Y]'
|
||||
|
||||
@ -59,9 +57,9 @@ class HBR(BasicNewsRecipe):
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.LOGIN_URL)
|
||||
br.select_form(name='signInForm')
|
||||
br['signInForm:username'] = self.username
|
||||
br['signInForm:password'] = self.password
|
||||
br.select_form(name='signin-form')
|
||||
br['signin-form:username'] = self.username
|
||||
br['signin-form:password'] = self.password
|
||||
raw = br.submit().read()
|
||||
if 'My Account' not in raw:
|
||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||
@ -161,27 +159,13 @@ class HBR(BasicNewsRecipe):
|
||||
return startDate, endDate
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
def hbr_parse_blogs(self, feeds):
|
||||
# Do the "official" parse_feeds first
|
||||
rssFeeds = Feed()
|
||||
|
||||
# Use the PARSE_FEEDS method to get a Feeds object of the articles
|
||||
rssFeeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
# Create a new feed of the right configuration and append to existing afeeds
|
||||
self.feed_to_index_append(rssFeeds[:], feeds)
|
||||
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
def parse_index(self):
|
||||
if self.INCLUDE_ARTICLES == True:
|
||||
soup = self.hbr_get_toc()
|
||||
feeds = self.hbr_parse_toc(soup)
|
||||
else:
|
||||
feeds = []
|
||||
|
||||
# blog stuff
|
||||
if self.INCLUDE_BLOGS == True:
|
||||
self.hbr_parse_blogs(feeds)
|
||||
return BasicNewsRecipe.parse_index(self)
|
||||
|
||||
return feeds
|
||||
#-------------------------------------------------------------------------------------------------
|
||||
|
BIN
recipes/icons/babyonline.png
Normal file
After Width: | Height: | Size: 256 B |
BIN
recipes/icons/socialdiva.png
Normal file
After Width: | Height: | Size: 1.0 KiB |
@ -1,71 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini & Edwin van Maastrigt'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com> and Edwin van Maastrigt <evanmaastrigt at gmail.com>'
|
||||
__description__ = 'Financial news daily paper - v1.02 (30, January 2010)'
|
||||
__author__ = 'Marco Saraceno'
|
||||
__copyright__ = '2010, Marco Saraceno <marcosaraceno at gmail.com>'
|
||||
description = 'Italian daily newspaper - v 1.1 (Mar14,2011)'
|
||||
|
||||
'''
|
||||
http://www.ilsole24ore.com/
|
||||
http://www.ilsole24ore.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class IlSole24Ore(BasicNewsRecipe):
|
||||
__author__ = 'Marco Saraceno'
|
||||
description = 'Italian financial daily newspaper'
|
||||
|
||||
class ilsole24Ore(BasicNewsRecipe):
|
||||
author = 'Lorenzo Vigentini & Edwin van Maastrigt'
|
||||
description = 'Financial news daily paper'
|
||||
|
||||
cover_url = 'http://www.ilsole24ore.com/img2007/print_header.gif'
|
||||
|
||||
title = u'il Sole 24 Ore New'
|
||||
publisher = 'italiaNews'
|
||||
category = 'News, finance, economy, politics'
|
||||
cover_url = 'http://www.shopping24.ilsole24ore.com/ProductRelated/rds/img/logo_sole.gif'
|
||||
title = u'Il Sole 24 Ore'
|
||||
publisher = 'Gruppo editoriale GRUPPO 24ORE'
|
||||
category = 'News, politics, culture, economy, financial, Italian'
|
||||
|
||||
language = 'it'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 50
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['header','titolo']}),
|
||||
dict(name='table', attrs={'class':['footer1024','footerdown']}),
|
||||
]
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('id', article.get('guid', None))
|
||||
link = article.get('link', None)
|
||||
if link is None:
|
||||
return article
|
||||
if link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A']
|
||||
b=['.' ,'/' ,'?' ,'-' ,'=' ,'&' ,'.com','www.','0']
|
||||
for i in range(0,len(a)):
|
||||
link=link.replace(a[i],b[i])
|
||||
link="http://"+link
|
||||
return link
|
||||
|
||||
feeds = [
|
||||
(u'Notizie Italia', u'http://www.ilsole24ore.com/rss/notizie/italia.xml'),
|
||||
(u'Notizie Europa', u'http://www.ilsole24ore.com/rss/notizie/europa.xml'),
|
||||
(u'Notizie USA', u'http://www.ilsole24ore.com/rss/notizie/usa.xml'),
|
||||
(u'Notizie Americhe', u'http://www.ilsole24ore.com/rss/notizie/americhe.xml'),
|
||||
(u'Notizie Medio Oriente e Africa', u'http://www.ilsole24ore.com/rss/notizie/medio-oriente-e-africa.xml'),
|
||||
(u'Notizie Asia e Oceania', u'http://www.ilsole24ore.com/rss/notizie/asia-e-oceania.xml'),
|
||||
(u'Commenti', u'http://www.ilsole24ore.com/rss/commenti-e-idee.xml'),
|
||||
(u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-e-tributi.xml'),
|
||||
(u'Finanza', u'http://www.ilsole24ore.com/rss/finanza-e-mercati.xml'),
|
||||
(u'Economia', u'http://www.ilsole24ore.com/rss/economia.xml'),
|
||||
(u'Tecnologia', u'http://www.ilsole24ore.com/rss/tecnologie.xml'),
|
||||
(u'Cultura', u'http://www.ilsole24ore.com/rss/cultura.xml'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
link, sep, params = url.rpartition('?')
|
||||
if link is None:
|
||||
return link.replace('_1.php', '_php')
|
||||
return link.replace('.shtml', '_PRN.shtml')
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'txt'})
|
||||
]
|
||||
# remove_tags = [dict(name='br')]
|
||||
|
||||
feeds = [
|
||||
(u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
|
||||
(u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'),
|
||||
(u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'),
|
||||
(u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'),
|
||||
(u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'),
|
||||
(u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'),
|
||||
(u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
|
||||
(u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
|
||||
(u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
|
||||
(u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml'),
|
||||
(u'Ambiente e Sicurezza',u'http://www.ilsole24ore.com/rss/prof_as.xml')
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:"Georgia","Times New Roman";}
|
||||
.linkHighlight {color:#0292c6;}
|
||||
.txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px};text-align:justify;font-family:"serif"}
|
||||
.txt p {line-height:18px;}
|
||||
.txt span {line-height:22px;}
|
||||
.title h3 {color:#7b7b7b;}
|
||||
.title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;}
|
||||
'''
|
||||
return url.replace('.shtml', '_PRN.shtml')
|
||||
|
||||
|
@ -18,7 +18,6 @@ class IrishTimes(BasicNewsRecipe):
|
||||
oldest_article = 1.0
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
simultaneous_downloads= 5
|
||||
|
||||
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
||||
@ -26,17 +25,17 @@ class IrishTimes(BasicNewsRecipe):
|
||||
|
||||
feeds = [
|
||||
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
|
||||
('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
|
||||
('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
|
||||
('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
|
||||
('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
|
||||
('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
|
||||
('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
|
||||
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
||||
('Ireland', 'http://rss.feedsportal.com/c/851/f/10845/index.rss'),
|
||||
('World', 'http://rss.feedsportal.com/c/851/f/10846/index.rss'),
|
||||
('Finance', 'http://rss.feedsportal.com/c/851/f/10847/index.rss'),
|
||||
('Features', 'http://rss.feedsportal.com/c/851/f/10848/index.rss'),
|
||||
('Sport', 'http://rss.feedsportal.com/c/851/f/10849/index.rss'),
|
||||
('Opinion', 'http://rss.feedsportal.com/c/851/f/10850/index.rss'),
|
||||
('Letters', 'http://rss.feedsportal.com/c/851/f/10851/index.rss'),
|
||||
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
|
||||
('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
|
||||
('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
|
||||
('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
|
||||
('Health', 'http://rss.feedsportal.com/c/851/f/10852/index.rss'),
|
||||
('Education & Parenting', 'http://rss.feedsportal.com/c/851/f/10853/index.rss'),
|
||||
('Motors', 'http://rss.feedsportal.com/c/851/f/10854/index.rss'),
|
||||
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
|
||||
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
|
||||
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
|
||||
@ -57,5 +56,3 @@ class IrishTimes(BasicNewsRecipe):
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.link
|
||||
|
||||
|
||||
|
@ -48,7 +48,7 @@ class LeMonde(BasicNewsRecipe):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
return self.adeify_images(soup)
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'([0-9])%'), lambda m: m.group(1) + ' %'),
|
||||
|
@ -1,4 +1,3 @@
|
||||
import string
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Newsweek(BasicNewsRecipe):
|
||||
@ -11,7 +10,6 @@ class Newsweek(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
BASE_URL = 'http://www.newsweek.com'
|
||||
INDEX = BASE_URL+'/topics.html'
|
||||
|
||||
keep_only_tags = dict(name='article', attrs={'class':'article-text'})
|
||||
remove_tags = [dict(attrs={'data-dartad':True})]
|
||||
@ -23,11 +21,14 @@ class Newsweek(BasicNewsRecipe):
|
||||
return soup
|
||||
|
||||
def newsweek_sections(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
for a in soup.findAll('a', title='Primary tag', href=True):
|
||||
yield (string.capitalize(self.tag_to_string(a)),
|
||||
self.BASE_URL+a['href'])
|
||||
|
||||
return [
|
||||
('Nation', 'http://www.newsweek.com/tag/nation.html'),
|
||||
('Society', 'http://www.newsweek.com/tag/society.html'),
|
||||
('Culture', 'http://www.newsweek.com/tag/culture.html'),
|
||||
('World', 'http://www.newsweek.com/tag/world.html'),
|
||||
('Politics', 'http://www.newsweek.com/tag/politics.html'),
|
||||
('Business', 'http://www.newsweek.com/tag/business.html'),
|
||||
]
|
||||
|
||||
def newsweek_parse_section_page(self, soup):
|
||||
for article in soup.findAll('article', about=True,
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
novosti.rs
|
||||
'''
|
||||
@ -21,34 +21,71 @@ class Novosti(BasicNewsRecipe):
|
||||
encoding = 'utf-8'
|
||||
language = 'sr'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.novosti.rs/images/basic/logo-print.png'
|
||||
extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
.article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||
.author{font-size: small}
|
||||
.articleLead{font-size: large; font-weight: bold}
|
||||
img{display: block; margin-bottom: 1em; margin-top: 1em}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['articleTitle','author','articleLead','articleBody']})]
|
||||
remove_tags = [dict(name=['embed','object','iframe','base','link','meta'])]
|
||||
feeds = [(u'Vesti', u'http://www.novosti.rs/rss/rss-vesti')]
|
||||
keep_only_tags = [dict(attrs={'class':['articleTitle','articleInfo','articleLead','singlePhoto fl','articleBody']})]
|
||||
remove_tags = [
|
||||
dict(name=['embed','object','iframe','base','link','meta'])
|
||||
,dict(name='a', attrs={'class':'loadComments topCommentsLink'})
|
||||
]
|
||||
remove_attributes = ['lang','xmlns:fb']
|
||||
|
||||
feeds = [
|
||||
(u'Politika' , u'http://www.novosti.rs/rss/2-Sve%20vesti')
|
||||
,(u'Drustvo' , u'http://www.novosti.rs/rss/1-Sve%20vesti')
|
||||
,(u'Ekonomija' , u'http://www.novosti.rs/rss/3-Sve%20vesti')
|
||||
,(u'Hronika' , u'http://www.novosti.rs/rss/4-Sve%20vesti')
|
||||
,(u'Dosije' , u'http://www.novosti.rs/rss/5-Sve%20vesti')
|
||||
,(u'Reportaze' , u'http://www.novosti.rs/rss/6-Sve%20vesti')
|
||||
,(u'Tehnologije' , u'http://www.novosti.rs/rss/35-Sve%20vesti')
|
||||
,(u'Zanimljivosti', u'http://www.novosti.rs/rss/26-Sve%20vesti')
|
||||
,(u'Auto' , u'http://www.novosti.rs/rss/50-Sve%20vesti')
|
||||
,(u'Sport' , u'http://www.novosti.rs/rss/11|47|12|14|13-Sve%20vesti')
|
||||
,(u'Svet' , u'http://www.novosti.rs/rss/7-Sve%20vesti')
|
||||
,(u'Region' , u'http://www.novosti.rs/rss/8-Sve%20vesti')
|
||||
,(u'Dijaspora' , u'http://www.novosti.rs/rss/9-Sve%20vesti')
|
||||
,(u'Spektakl' , u'http://www.novosti.rs/rss/10-Sve%20vesti')
|
||||
,(u'Kultura' , u'http://www.novosti.rs/rss/31-Sve%20vesti')
|
||||
,(u'Srbija' , u'http://www.novosti.rs/rss/15-Sve%20vesti')
|
||||
,(u'Beograd' , u'http://www.novosti.rs/rss/16-Sve%20vesti')
|
||||
,(u'Zivot+' , u'http://www.novosti.rs/rss/24|33|34|25|20|18|32|19-Sve%20vesti')
|
||||
,(u'Turizam' , u'http://www.novosti.rs/rss/36-Sve%20vesti')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('span', attrs={'class':'author'}):
|
||||
item.name='p'
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
nspm.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import NavigableString
|
||||
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
|
||||
|
||||
class Nspm(BasicNewsRecipe):
|
||||
title = 'Nova srpska politicka misao'
|
||||
@ -21,7 +21,6 @@ class Nspm(BasicNewsRecipe):
|
||||
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
||||
encoding = 'utf-8'
|
||||
language = 'sr'
|
||||
delay = 2
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
|
||||
@ -29,27 +28,21 @@ class Nspm(BasicNewsRecipe):
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: "Times New Roman", serif1, serif}
|
||||
.article_description{font-family: Arial, sans1, sans-serif}
|
||||
img{margin-top:0.5em; margin-bottom: 0.7em}
|
||||
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
|
||||
.author{color: #990000; font-weight: bold}
|
||||
.author,.createdate{font-size: 0.9em} """
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
keep_only_tags = [dict(attrs={'id':'jsn-mainbody'})]
|
||||
remove_tags = [
|
||||
dict(name=['link','object','embed','script','meta','base','iframe'])
|
||||
,dict(attrs={'class':'buttonheading'})
|
||||
]
|
||||
remove_tags_before = dict(attrs={'class':'contentheading'})
|
||||
remove_tags_after = dict(attrs={'class':'article_separator'})
|
||||
remove_attributes = ['width','height']
|
||||
remove_tags = [dict(name=['link','script','meta','base','img'])]
|
||||
remove_attributes = ['width','height','lang','xmlns:fb','xmlns:og','vspace','hspace','type','start','size']
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -57,21 +50,67 @@ class Nspm(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
feeds = [
|
||||
(u'Rubrike' , u'http://www.nspm.rs/rubrike/feed/rss.html')
|
||||
,(u'Debate' , u'http://www.nspm.rs/debate/feed/rss.html')
|
||||
,(u'Reci i misli' , u'http://www.nspm.rs/reci-i-misli/feed/rss.html')
|
||||
(u'Rubrike' , u'http://www.nspm.rs/rubrike/feed/rss.html' )
|
||||
,(u'Debate' , u'http://www.nspm.rs/debate/feed/rss.html' )
|
||||
,(u'Reci i misli' , u'http://www.nspm.rs/reci-i-misli/feed/rss.html' )
|
||||
,(u'Samo smeh srbina spasava', u'http://www.nspm.rs/samo-smeh-srbina-spasava/feed/rss.html')
|
||||
,(u'Polemike' , u'http://www.nspm.rs/polemike/feed/rss.html')
|
||||
,(u'Prikazi' , u'http://www.nspm.rs/prikazi/feed/rss.html')
|
||||
,(u'Prenosimo' , u'http://www.nspm.rs/prenosimo/feed/rss.html')
|
||||
,(u'Hronika' , u'http://www.nspm.rs/tabela/hronika/feed/rss.html')
|
||||
,(u'Polemike' , u'http://www.nspm.rs/polemike/feed/rss.html' )
|
||||
,(u'Prikazi' , u'http://www.nspm.rs/prikazi/feed/rss.html' )
|
||||
,(u'Prenosimo' , u'http://www.nspm.rs/prenosimo/feed/rss.html' )
|
||||
,(u'Hronika' , u'http://www.nspm.rs/tabela/hronika/feed/rss.html' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.body.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.body.findAll('h1'):
|
||||
nh = NavigableString(item.a.string)
|
||||
item.a.extract()
|
||||
item.insert(0,nh)
|
||||
return self.adeify_images(soup)
|
||||
atitle = soup.body.find('a',attrs={'class':'contentpagetitle'})
|
||||
if atitle:
|
||||
cleanTitle = Tag(soup,'h1',[('class','contentpagetitle')])
|
||||
cnt = NavigableString(self.tag_to_string(atitle))
|
||||
cleanTitle.append(cnt)
|
||||
|
||||
author = soup.body.find('span',attrs={'class':'author'})
|
||||
if author:
|
||||
author.extract()
|
||||
author.name = 'div'
|
||||
|
||||
crdate = soup.body.find('td',attrs={'class':'createdate'})
|
||||
if crdate:
|
||||
cleanCrdate = Tag(soup,'div',[('class','createdate')])
|
||||
cnt = NavigableString(self.tag_to_string(crdate))
|
||||
cleanCrdate.append(cnt)
|
||||
|
||||
#get the dependant element
|
||||
artText = Tag(soup,'div',[('class','text')])
|
||||
textHolderp = crdate.parent
|
||||
textHolder = textHolderp.nextSibling
|
||||
while textHolder and (not isinstance(textHolder,Tag) or (textHolder.name <> textHolderp.name)):
|
||||
textHolder = textHolder.nextSibling
|
||||
if textHolder.td:
|
||||
artText = textHolder.td
|
||||
artText.name = 'div'
|
||||
artText.attrs = []
|
||||
artText['class'] = 'text'
|
||||
artText.extract()
|
||||
|
||||
soup.body.contents=[]
|
||||
|
||||
soup.body.append(cleanTitle)
|
||||
soup.body.append(author)
|
||||
soup.body.append(cleanCrdate)
|
||||
soup.body.append(artText)
|
||||
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -8,23 +8,36 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net> edited by Huan T'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Slashdot(BasicNewsRecipe):
|
||||
title = u'Slashdot.org'
|
||||
description = '''Tech news. WARNING: This recipe downloads a lot
|
||||
of content and may result in your IP being banned from slashdot.org'''
|
||||
oldest_article = 7
|
||||
simultaneous_downloads = 1
|
||||
delay = 3
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
title = u'Slashdot.org'
|
||||
description = '''Tech news. WARNING: This recipe downloads a lot
|
||||
of content and may result in your IP being banned from slashdot.org'''
|
||||
oldest_article = 7
|
||||
simultaneous_downloads = 1
|
||||
delay = 3
|
||||
max_articles_per_feed = 100
|
||||
language = 'en'
|
||||
|
||||
__author__ = 'floweros edited by Huan T'
|
||||
no_stylesheets = True
|
||||
# keep_only_tags = [
|
||||
# dict(name='div',attrs={'class':'article'}),
|
||||
# dict(name='div',attrs={'class':'commentTop'}),
|
||||
# ]
|
||||
__author__ = 'floweros edited by Huan T'
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [
|
||||
dict(name='div',attrs={'id':'article'}),
|
||||
dict(name='div',attrs={'class':['postBody' 'details']}),
|
||||
dict(name='footer',attrs={'class':['clearfix meta article-foot']}),
|
||||
dict(name='article',attrs={'class':['fhitem fhitem-story article usermode thumbs grid_24']}),
|
||||
dict(name='dl',attrs={'class':'relatedPosts'}),
|
||||
dict(name='h2',attrs={'class':'story'}),
|
||||
dict(name='span',attrs={'class':'comments'}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
|
||||
remove_tags = [
|
||||
dict(name='aside',attrs={'id':'slashboxes'}),
|
||||
dict(name='div',attrs={'class':'paginate'}),
|
||||
dict(name='section',attrs={'id':'comments'}),
|
||||
dict(name='span',attrs={'class':'topic'}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Slashdot',
|
||||
u'http://rss.slashdot.org/Slashdot/slashdot'),
|
||||
(u'/. IT',
|
||||
@ -37,5 +50,3 @@ class Slashdot(BasicNewsRecipe):
|
||||
u'http://rss.slashdot.org/Slashdot/slashdotYourRightsOnline')
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner_origlink', None)
|
||||
|
54
recipes/socialdiva.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = u'2011'
|
||||
'''
|
||||
socialdiva.ro
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SocialDiva(BasicNewsRecipe):
|
||||
title = u'Social Diva'
|
||||
__author__ = u'Silviu Cotoara'
|
||||
description = u'When in doubt, wear red'
|
||||
publisher = 'Social Diva'
|
||||
oldest_article = 5
|
||||
language = 'ro'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
category = 'Ziare,Reviste,Femei'
|
||||
encoding = 'utf-8'
|
||||
cover_url = 'http://www.socialdiva.ro/images/logo.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'col-alpha mt5 content_articol'}),
|
||||
dict(name='div', attrs={'class':'mt5'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a', attrs={'class':['comments float-left scroll mt5']}),
|
||||
dict(name='a', attrs={'class':['comments float-left scroll']}),
|
||||
dict(name='div', attrs={'class':['rating-container relative float-left']}),
|
||||
dict(name='div', attrs={'class':['float-right social_articol']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='a', attrs={'class':['comments float-left scroll mt5']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.socialdiva.ro/rss.html')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
staradvertiser.com
|
||||
'''
|
||||
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Starbulletin(BasicNewsRecipe):
|
||||
title = 'Honolulu Star Advertiser'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Latest national and local Hawaii sports news"
|
||||
description = 'Latest national and local Hawaii sports news'
|
||||
publisher = 'Honolulu Star-Advertiser'
|
||||
category = 'news, Honolulu, Hawaii'
|
||||
oldest_article = 2
|
||||
@ -19,7 +19,13 @@ class Starbulletin(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif} h1,.brown,.postCredit{color: #663300} .storyDeck{font-size: 1.2em; font-weight: bold} '
|
||||
masthead_url = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif'
|
||||
extra_css = """
|
||||
body{font-family: Verdana,Arial,Helvetica,sans-serif}
|
||||
h1,.brown,.postCredit{color: #663300}
|
||||
.storyDeck{font-size: 1.2em; font-weight: bold}
|
||||
img{display: block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -28,14 +34,16 @@ class Starbulletin(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
remove_tags_before = dict(attrs={'id':'storyTitle'})
|
||||
remove_tags_after = dict(name='div',attrs={'class':'storytext'})
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':'storyTitle'})
|
||||
,dict(attrs={'class':['storyDeck','postCredit']})
|
||||
,dict(name='span',attrs={'class':'brown'})
|
||||
,dict(name='div',attrs={'class':'storytext'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','span'])
|
||||
,dict(attrs={'class':'insideStoryImage'})
|
||||
dict(name=['object','link','script','span','meta','base','iframe'])
|
||||
,dict(attrs={'class':['insideStoryImage','insideStoryAd']})
|
||||
,dict(attrs={'name':'fb_share'})
|
||||
,dict(name='div',attrs={'class':'storytext'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
@ -47,3 +55,24 @@ class Starbulletin(BasicNewsRecipe):
|
||||
,(u'Business' , u'http://www.staradvertiser.com/business/index.rss' )
|
||||
,(u'Travel' , u'http://www.staradvertiser.com/travel/index.rss' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('a'):
|
||||
limg = item.find('img')
|
||||
if item.string is not None:
|
||||
str = item.string
|
||||
item.replaceWith(str)
|
||||
else:
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
@ -19,11 +18,11 @@ class SueddeutcheZeitung(BasicNewsRecipe):
|
||||
encoding = 'cp1252'
|
||||
needs_subscription = True
|
||||
remove_empty_feeds = True
|
||||
delay = 2
|
||||
delay = 1
|
||||
PREFIX = 'http://www.sueddeutsche.de'
|
||||
INDEX = PREFIX + '/app/epaper/textversion/'
|
||||
use_embedded_content = False
|
||||
masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/logo.gif'
|
||||
masthead_url = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif'
|
||||
language = 'de'
|
||||
publication_type = 'newspaper'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif} '
|
||||
@ -36,7 +35,7 @@ class SueddeutcheZeitung(BasicNewsRecipe):
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
remove_attributes = ['height','width']
|
||||
remove_attributes = ['height','width','style']
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -50,24 +49,37 @@ class SueddeutcheZeitung(BasicNewsRecipe):
|
||||
|
||||
remove_tags =[
|
||||
dict(attrs={'class':'hidePrint'})
|
||||
,dict(name=['link','object','embed','base','iframe'])
|
||||
,dict(name=['link','object','embed','base','iframe','br'])
|
||||
]
|
||||
keep_only_tags = [dict(attrs={'class':'artikelBox'})]
|
||||
remove_tags_before = dict(attrs={'class':'artikelTitel'})
|
||||
remove_tags_after = dict(attrs={'class':'author'})
|
||||
|
||||
feeds = [
|
||||
(u'Politik' , INDEX + 'Politik/' )
|
||||
,(u'Seite drei' , INDEX + 'Seite+drei/' )
|
||||
,(u'Meinungsseite', INDEX + 'Meinungsseite/')
|
||||
,(u'Wissen' , INDEX + 'Wissen/' )
|
||||
,(u'Panorama' , INDEX + 'Panorama/' )
|
||||
,(u'Feuilleton' , INDEX + 'Feuilleton/' )
|
||||
,(u'Medien' , INDEX + 'Medien/' )
|
||||
,(u'Wirtschaft' , INDEX + 'Wirtschaft/' )
|
||||
,(u'Sport' , INDEX + 'Sport/' )
|
||||
,(u'Bayern' , INDEX + 'Bayern/' )
|
||||
,(u'Muenchen' , INDEX + 'M%FCnchen/' )
|
||||
(u'Politik' , INDEX + 'Politik/' )
|
||||
,(u'Seite drei' , INDEX + 'Seite+drei/' )
|
||||
,(u'Meinungsseite' , INDEX + 'Meinungsseite/')
|
||||
,(u'Wissen' , INDEX + 'Wissen/' )
|
||||
,(u'Panorama' , INDEX + 'Panorama/' )
|
||||
,(u'Feuilleton' , INDEX + 'Feuilleton/' )
|
||||
,(u'Medien' , INDEX + 'Medien/' )
|
||||
,(u'Wirtschaft' , INDEX + 'Wirtschaft/' )
|
||||
,(u'Sport' , INDEX + 'Sport/' )
|
||||
,(u'Bayern' , INDEX + 'Bayern/' )
|
||||
,(u'Muenchen' , INDEX + 'M%FCnchen/' )
|
||||
,(u'Muenchen City' , INDEX + 'M%FCnchen+City/' )
|
||||
,(u'Jetzt.de' , INDEX + 'Jetzt.de/' )
|
||||
,(u'Reise' , INDEX + 'Reise/' )
|
||||
,(u'SZ Extra' , INDEX + 'SZ+Extra/' )
|
||||
,(u'Wochenende' , INDEX + 'SZ+am+Wochenende/' )
|
||||
,(u'Stellen-Markt' , INDEX + 'Stellen-Markt/')
|
||||
,(u'Motormarkt' , INDEX + 'Motormarkt/')
|
||||
,(u'Immobilien-Markt', INDEX + 'Immobilien-Markt/')
|
||||
,(u'Thema' , INDEX + 'Thema/' )
|
||||
,(u'Forum' , INDEX + 'Forum/' )
|
||||
,(u'Leute' , INDEX + 'Leute/' )
|
||||
,(u'Jugend' , INDEX + 'Jugend/' )
|
||||
,(u'Beilage' , INDEX + 'Beilage/' )
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
|
@ -24,30 +24,31 @@ class TabuRo(BasicNewsRecipe):
|
||||
cover_url = 'http://www.tabu.ro/img/tabu-logo2.png'
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : language
|
||||
,'publisher' : publisher
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'Article'}),
|
||||
]
|
||||
dict(name='h2', attrs={'class':'articol_titlu'}),
|
||||
dict(name='div', attrs={'class':'poza_articol_featured'}),
|
||||
dict(name='div', attrs={'class':'articol_text'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['advertisementArticle']}),
|
||||
dict(name='div', attrs={'class':'voting_number'}),
|
||||
dict(name='div', attrs={'id':'number_votes'}),
|
||||
dict(name='div', attrs={'id':'rating_one'}),
|
||||
dict(name='div', attrs={'class':'float: right;'})
|
||||
dict(name='div', attrs={'class':'asemanatoare'}),
|
||||
dict(name='div', attrs={'class':'social'})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':'social'}),
|
||||
dict(name='div', attrs={'id':'comments'}),
|
||||
]
|
||||
dict(name='div', attrs={'class':'asemanatoare'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Feeds', u'http://www.tabu.ro/rss_all.xml')
|
||||
(u'Feeds', u'http://www.tabu.ro/feed/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
26
recipes/the_journal.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011 Phil Burns'
|
||||
'''
|
||||
TheJournal.ie
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheJournal(BasicNewsRecipe):
|
||||
|
||||
__author_ = 'Phil Burns'
|
||||
title = u'TheJournal.ie'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'utf8'
|
||||
language = 'en_IE'
|
||||
timefmt = ' (%A, %B %d, %Y)'
|
||||
|
||||
no_stylesheets = True
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'}),
|
||||
dict(name=['script', 'noscript'])]
|
||||
|
||||
extra_css = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em }'
|
||||
|
||||
feeds = [
|
||||
(u'Latest News', u'http://www.thejournal.ie/feed/')]
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
description = 'TheMarker Financial News in Hebrew'
|
||||
__author__ = 'TonyTheBookworm, Marbs'
|
||||
__author__ = 'Marbs'
|
||||
cover_url = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
|
||||
title = u'TheMarker'
|
||||
language = 'he'
|
||||
@ -11,42 +11,38 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1
|
||||
remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']}) ]
|
||||
max_articles_per_feed = 10
|
||||
keep_only_tags =dict(name='div', attrs={'id':'content'})
|
||||
remove_attributes = ['width','float','margin-left']
|
||||
no_stylesheets = True
|
||||
remove_tags = [dict(name='div', attrs={'class':['social-nav article-social-nav','prsnlArticleEnvelope','cb']}) ,
|
||||
dict(name='a', attrs={'href':['/misc/mobile']}) ,
|
||||
dict(name='span', attrs={'class':['post-summ']}) ]
|
||||
max_articles_per_feed = 100
|
||||
extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
|
||||
feeds = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
|
||||
(u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
|
||||
(u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
|
||||
(u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
|
||||
(u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
|
||||
(u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
|
||||
(u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
|
||||
(u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
|
||||
(u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
|
||||
(u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
|
||||
(u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
|
||||
feeds = [(u'Head Lines', u'http://www.themarker.com/cmlink/1.144'),
|
||||
(u'TA Market', u'http://www.themarker.com/cmlink/1.243'),
|
||||
(u'Real Estate', u'http://www.themarker.com/cmlink/1.605656'),
|
||||
(u'Global', u'http://www.themarker.com/cmlink/1.605658'),
|
||||
(u'Wall Street', u'http://www.themarker.com/cmlink/1.613713'),
|
||||
(u'SmartPhone', u'http://www.themarker.com/cmlink/1.605661'),
|
||||
(u'Law', u'http://www.themarker.com/cmlink/1.605664'),
|
||||
(u'Media', u'http://www.themarker.com/cmlink/1.605660'),
|
||||
(u'Consumer', u'http://www.themarker.com/cmlink/1.605662'),
|
||||
(u'Career', u'http://www.themarker.com/cmlink/1.605665'),
|
||||
(u'Car', u'http://www.themarker.com/cmlink/1.605663'),
|
||||
(u'High Tech', u'http://www.themarker.com/cmlink/1.605659'),
|
||||
(u'Small Business', u'http://www.themarker.com/cmlink/1.605666')]
|
||||
|
||||
def print_version(self, url):
|
||||
split1 = url.split("=")
|
||||
weblinks = url
|
||||
#split1 = url.split("/")
|
||||
#print_url='http://www.themarker.com/misc/article-print-page/'+split1[-1]
|
||||
txt=url
|
||||
|
||||
if weblinks is not None:
|
||||
for link in weblinks:
|
||||
#---------------------------------------------------------
|
||||
#here we need some help with some regexpressions
|
||||
#we are trying to find it.themarker.com in a url
|
||||
#-----------------------------------------------------------
|
||||
re1='.*?' # Non-greedy match on filler
|
||||
re2='(it\\.themarker\\.com)' # Fully Qualified Domain Name 1
|
||||
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
|
||||
m = rg.search(url)
|
||||
re1='.*?' # Non-greedy match on filler
|
||||
re2='(tv)' # Word 1
|
||||
|
||||
|
||||
if m:
|
||||
split2 = url.split("article/")
|
||||
print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
|
||||
|
||||
else:
|
||||
print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
|
||||
|
||||
return print_url
|
||||
rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
|
||||
m = rg.search(txt)
|
||||
if m:
|
||||
#print 'bad link'
|
||||
return 1
|
||||
|
115
recipes/volksrant_sub.recipe
Normal file
@ -0,0 +1,115 @@
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Volkskrant_full(BasicNewsRecipe):
|
||||
# This recipe will download the Volkskrant newspaper,
|
||||
# from the subscribers site. It requires a password.
|
||||
# Known issues are: articles that are spread out over
|
||||
# multiple pages will appear multiple times. Pages
|
||||
# that contain only adverts will appear, but empty.
|
||||
# The supplement 'Volkskrant Magazine' on saturday
|
||||
# is currently not downloaded.
|
||||
# You can set a manual date, to download an archived
|
||||
# newspaper. Volkskrant stores over a month at the
|
||||
# moment of writing. To do so I suggest you unmark
|
||||
# the date on the line below, and insert it in the title. Then
|
||||
# follow the instructions marked further below.
|
||||
|
||||
title = 'De Volkskrant (subscription)' # [za, 13 nov 2010]'
|
||||
__author__ = u'Selcal'
|
||||
description = u"Volkskrant"
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
language = 'nl'
|
||||
use_embedded_content = False
|
||||
simultaneous_downloads = 1
|
||||
delay = 1
|
||||
needs_subscription = True
|
||||
# Set RETRIEVEDATE to 'yyyymmdd' to load an older
|
||||
# edition. Otherwise keep '%Y%m%d'
|
||||
# When setting a manual date, unmark and add the date
|
||||
# to the title above, and unmark the timefmt line to stop
|
||||
# Calibre from adding today's date in addition.
|
||||
|
||||
# timefmt = ''
|
||||
RETRIEVEDATE = strftime('%Y%m%d')
|
||||
INDEX_MAIN = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/#text'
|
||||
INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/'
|
||||
LOGIN = 'http://www.volkskrant.nl/vk/user/loggedIn.do'
|
||||
remove_tags = [dict(name='address')]
|
||||
cover_url = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/page.jpg'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.LOGIN)
|
||||
br.select_form(nr = 0)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
krant = []
|
||||
def strip_title(_title):
|
||||
i = 0
|
||||
while ((_title[i] <> ":") and (i <= len(_title))):
|
||||
i = i + 1
|
||||
return(_title[0:i])
|
||||
for temp in range (5):
|
||||
try:
|
||||
soup = self.index_to_soup(self.INDEX_MAIN)
|
||||
break
|
||||
except:
|
||||
#print '(Retrying main index load)'
|
||||
continue
|
||||
mainsoup = soup.find('td', attrs={'id': 'select_page_top'})
|
||||
for option in mainsoup.findAll('option'):
|
||||
articles = []
|
||||
_INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text'
|
||||
_INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/'
|
||||
#print ''
|
||||
#print '<------- Processing section: ' + _INDEX + ' ------------------------->'
|
||||
for temp in range (5):
|
||||
try:
|
||||
soup = self.index_to_soup(_INDEX)
|
||||
break
|
||||
except:
|
||||
#print '(Retrying index load)'
|
||||
continue
|
||||
for item in soup.findAll('area'):
|
||||
art_nr = item['class']
|
||||
attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)]
|
||||
#print '==> Found: ' + attrname;
|
||||
index_title = soup.find('div', attrs={'class': attrname})
|
||||
get_title = index_title['title'];
|
||||
_ARTICLE = _INDEX_ARTICLE + attrname + '.html#text'
|
||||
title = get_title;
|
||||
#print '--> Title: ' + title;
|
||||
#print '--> URL: ' + _ARTICLE;
|
||||
for temp in range (5):
|
||||
try:
|
||||
souparticle = self.index_to_soup(_ARTICLE);
|
||||
break
|
||||
except:
|
||||
print '(Retrying URL load)'
|
||||
continue
|
||||
headerurl = souparticle.findAll('frame')[0]['src'];
|
||||
#print '--> Read frame name for header: ' + headerurl;
|
||||
url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html';
|
||||
#print '--> Corrected URL: ' + url;
|
||||
if (get_title <> ''):
|
||||
title = strip_title(get_title)
|
||||
date = strftime(' %B %Y')
|
||||
if (title <> ''):
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
krant.append( (option.string, articles))
|
||||
return krant
|
||||
|
@ -3,7 +3,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '4 February 2011, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__version__ = 'v0.05'
|
||||
__date__ = '9, February 2011'
|
||||
__date__ = '13, April 2011'
|
||||
'''
|
||||
http://www.weblogssl.com/
|
||||
'''
|
||||
@ -19,7 +19,7 @@ class weblogssl(BasicNewsRecipe):
|
||||
category = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1.5
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
@ -28,50 +28,52 @@ class weblogssl(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
# Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
|
||||
# un caracter # por delante, es decir, # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
|
||||
# haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
|
||||
# un caracter # por delante, es decir, # ,(u'Applesfera', u'http://feeds.weblogssl.com/applesfera')
|
||||
# haría que no se descargase Applesfera.
|
||||
|
||||
feeds = [
|
||||
(u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
|
||||
(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
|
||||
(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
|
||||
(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
|
||||
(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
|
||||
(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
|
||||
(u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
|
||||
(u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
|
||||
(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
|
||||
(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
|
||||
(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
|
||||
(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
|
||||
(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
|
||||
(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
|
||||
(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
|
||||
(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
|
||||
(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
|
||||
(u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
|
||||
(u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
|
||||
(u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
|
||||
(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
|
||||
(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
|
||||
(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
|
||||
(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
|
||||
(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
|
||||
(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
|
||||
(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
|
||||
(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
|
||||
(u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
|
||||
(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
|
||||
(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
|
||||
(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
|
||||
(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
|
||||
(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
|
||||
(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
|
||||
(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
|
||||
(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
|
||||
(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
|
||||
(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
|
||||
(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
|
||||
(u'Xataka', u'http://feeds.weblogssl.com/xataka2')
|
||||
,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
|
||||
,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
|
||||
,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto')
|
||||
,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon')
|
||||
,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia')
|
||||
,(u'Genbeta', u'http://feeds.weblogssl.com/genbeta')
|
||||
,(u'Genbeta Dev', u'http://feeds.weblogssl.com/genbetadev')
|
||||
,(u'Applesfera', u'http://feeds.weblogssl.com/applesfera')
|
||||
,(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra')
|
||||
,(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred')
|
||||
,(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine')
|
||||
,(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2')
|
||||
,(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica')
|
||||
,(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero')
|
||||
,(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco')
|
||||
,(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa')
|
||||
,(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom')
|
||||
,(u'Fandemia', u'http://feeds.weblogssl.com/fandemia')
|
||||
,(u'Noctamina', u'http://feeds.weblogssl.com/noctamina')
|
||||
,(u'Tendencias', u'http://feeds.weblogssl.com/trendencias')
|
||||
,(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas')
|
||||
,(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar')
|
||||
,(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion')
|
||||
,(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera')
|
||||
,(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia')
|
||||
,(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica')
|
||||
,(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg')
|
||||
,(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora')
|
||||
,(u'Mensencia', u'http://feeds.weblogssl.com/mensencia')
|
||||
,(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas')
|
||||
,(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion')
|
||||
,(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1')
|
||||
,(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto')
|
||||
,(u'Motorpasi\xf3n Futuro', u'http://feeds.weblogssl.com/motorpasionfuturo')
|
||||
,(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol')
|
||||
,(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites')
|
||||
,(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar')
|
||||
,(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2')
|
||||
,(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos')
|
||||
,(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme')
|
||||
,(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
|
||||
]
|
||||
|
||||
|
||||
@ -102,3 +104,4 @@ class weblogssl(BasicNewsRecipe):
|
||||
video_yt['src'] = fuente3 + '/0.jpg'
|
||||
|
||||
return soup
|
||||
|
||||
|
@ -81,6 +81,11 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def abs_wsj_url(self, href):
|
||||
if not href.startswith('http'):
|
||||
href = 'http://online.wsj.com' + href
|
||||
return href
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.wsj_get_index()
|
||||
|
||||
@ -99,14 +104,14 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
pageone = a['href'].endswith('pageone')
|
||||
if pageone:
|
||||
title = 'Front Section'
|
||||
url = 'http://online.wsj.com' + a['href']
|
||||
url = self.abs_wsj_url(a['href'])
|
||||
feeds = self.wsj_add_feed(feeds,title,url)
|
||||
title = "What's News"
|
||||
url = url.replace('pageone','whatsnews')
|
||||
feeds = self.wsj_add_feed(feeds,title,url)
|
||||
else:
|
||||
title = self.tag_to_string(a)
|
||||
url = 'http://online.wsj.com' + a['href']
|
||||
url = self.abs_wsj_url(a['href'])
|
||||
feeds = self.wsj_add_feed(feeds,title,url)
|
||||
return feeds
|
||||
|
||||
@ -163,7 +168,7 @@ class WallStreetJournal(BasicNewsRecipe):
|
||||
title = self.tag_to_string(a).strip() + ' [%s]'%meta
|
||||
else:
|
||||
title = self.tag_to_string(a).strip()
|
||||
url = 'http://online.wsj.com'+a['href']
|
||||
url = self.abs_wsj_url(a['href'])
|
||||
desc = ''
|
||||
for p in container.findAll('p'):
|
||||
desc = self.tag_to_string(p)
|
||||
|
@ -48,7 +48,7 @@ authors_completer_append_separator = False
|
||||
# When this tweak is changed, the author_sort values stored with each author
|
||||
# must be recomputed by right-clicking on an author in the left-hand tags pane,
|
||||
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
||||
author_sort_copy_method = 'invert'
|
||||
author_sort_copy_method = 'comma'
|
||||
|
||||
#: Use author sort in Tag Browser
|
||||
# Set which author field to display in the tags pane (the list of authors,
|
||||
@ -118,6 +118,7 @@ sort_columns_at_startup = None
|
||||
# timestamp default if not set: dd MMM yyyy
|
||||
gui_pubdate_display_format = 'MMM yyyy'
|
||||
gui_timestamp_display_format = 'dd MMM yyyy'
|
||||
gui_last_modified_display_format = 'dd MMM yyyy'
|
||||
|
||||
#: Control sorting of titles and series in the library display
|
||||
# Control title and series sorting in the library view. If set to
|
||||
@ -266,26 +267,6 @@ max_content_server_tags_shown=5
|
||||
content_server_will_display = ['*']
|
||||
content_server_wont_display = []
|
||||
|
||||
#: Set custom metadata fields that the book details panel will or will not display.
|
||||
# book_details_will_display is a list of custom fields to be displayed.
|
||||
# book_details_wont_display is a list of custom fields not to be displayed.
|
||||
# wont_display has priority over will_display.
|
||||
# The special value '*' means all custom fields. The value [] means no entries.
|
||||
# Defaults:
|
||||
# book_details_will_display = ['*']
|
||||
# book_details_wont_display = []
|
||||
# Examples:
|
||||
# To display only the custom fields #mytags and #genre:
|
||||
# book_details_will_display = ['#mytags', '#genre']
|
||||
# book_details_wont_display = []
|
||||
# To display all fields except #mycomments:
|
||||
# book_details_will_display = ['*']
|
||||
# book_details_wont_display['#mycomments']
|
||||
# As above, this tweak affects only display of custom fields. The standard
|
||||
# fields are not affected
|
||||
book_details_will_display = ['*']
|
||||
book_details_wont_display = []
|
||||
|
||||
#: Set the maximum number of sort 'levels'
|
||||
# Set the maximum number of sort 'levels' that calibre will use to resort the
|
||||
# library after certain operations such as searches or device insertion. Each
|
||||
|
BIN
resources/images/drm-locked.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
resources/images/drm-unlocked.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
resources/images/highlight_only_off.png
Normal file
After Width: | Height: | Size: 810 B |
BIN
resources/images/highlight_only_on.png
Normal file
After Width: | Height: | Size: 396 B |
Before Width: | Height: | Size: 6.3 KiB |
BIN
resources/images/identifiers.png
Normal file
After Width: | Height: | Size: 705 B |
@ -7,17 +7,30 @@ CREATE TABLE books ( id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE,
|
||||
sort TEXT COLLATE NOCASE,
|
||||
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
uri TEXT,
|
||||
series_index INTEGER NOT NULL DEFAULT 1,
|
||||
pubdate TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
series_index REAL NOT NULL DEFAULT 1.0,
|
||||
author_sort TEXT COLLATE NOCASE,
|
||||
isbn TEXT DEFAULT "" COLLATE NOCASE,
|
||||
path TEXT NOT NULL DEFAULT ""
|
||||
);
|
||||
lccn TEXT DEFAULT "" COLLATE NOCASE,
|
||||
path TEXT NOT NULL DEFAULT "",
|
||||
flags INTEGER NOT NULL DEFAULT 1
|
||||
, uuid TEXT, has_cover BOOL DEFAULT 0, last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
|
||||
CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NOT NULL,
|
||||
author INTEGER NOT NULL,
|
||||
UNIQUE(book, author)
|
||||
);
|
||||
CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NOT NULL,
|
||||
lang_code INTEGER NOT NULL,
|
||||
item_order INTEGER NOT NULL DEFAULT 0,
|
||||
UNIQUE(book, lang_code)
|
||||
);
|
||||
CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY,
|
||||
book INTEGER NON NULL,
|
||||
name TEXT NON NULL,
|
||||
val TEXT NON NULL,
|
||||
UNIQUE(book,name));
|
||||
CREATE TABLE books_publishers_link ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NOT NULL,
|
||||
publisher INTEGER NOT NULL,
|
||||
@ -49,11 +62,51 @@ CREATE TABLE conversion_options ( id INTEGER PRIMARY KEY,
|
||||
data BLOB NOT NULL,
|
||||
UNIQUE(format,book)
|
||||
);
|
||||
CREATE TABLE custom_columns (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
label TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
datatype TEXT NOT NULL,
|
||||
mark_for_delete BOOL DEFAULT 0 NOT NULL,
|
||||
editable BOOL DEFAULT 1 NOT NULL,
|
||||
display TEXT DEFAULT "{}" NOT NULL,
|
||||
is_multiple BOOL DEFAULT 0 NOT NULL,
|
||||
normalized BOOL NOT NULL,
|
||||
UNIQUE(label)
|
||||
);
|
||||
CREATE TABLE data ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NON NULL,
|
||||
format TEXT NON NULL COLLATE NOCASE,
|
||||
uncompressed_size INTEGER NON NULL,
|
||||
name TEXT NON NULL,
|
||||
UNIQUE(book, format)
|
||||
);
|
||||
CREATE TABLE feeds ( id INTEGER PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
script TEXT NOT NULL,
|
||||
UNIQUE(title)
|
||||
);
|
||||
CREATE TABLE identifiers ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NON NULL,
|
||||
type TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE,
|
||||
val TEXT NON NULL COLLATE NOCASE,
|
||||
UNIQUE(book, type)
|
||||
);
|
||||
CREATE TABLE languages ( id INTEGER PRIMARY KEY,
|
||||
lang_code TEXT NON NULL COLLATE NOCASE,
|
||||
UNIQUE(lang_code)
|
||||
);
|
||||
CREATE TABLE library_id ( id INTEGER PRIMARY KEY,
|
||||
uuid TEXT NOT NULL,
|
||||
UNIQUE(uuid)
|
||||
);
|
||||
CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY,
|
||||
book INTEGER NOT NULL,
|
||||
UNIQUE(book));
|
||||
CREATE TABLE preferences(id INTEGER PRIMARY KEY,
|
||||
key TEXT NON NULL,
|
||||
val TEXT NON NULL,
|
||||
UNIQUE(key));
|
||||
CREATE TABLE publishers ( id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL COLLATE NOCASE,
|
||||
sort TEXT COLLATE NOCASE,
|
||||
@ -72,34 +125,143 @@ CREATE TABLE tags ( id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL COLLATE NOCASE,
|
||||
UNIQUE (name)
|
||||
);
|
||||
CREATE TABLE data ( id INTEGER PRIMARY KEY,
|
||||
book INTEGER NON NULL,
|
||||
format TEXT NON NULL COLLATE NOCASE,
|
||||
uncompressed_size INTEGER NON NULL,
|
||||
name TEXT NON NULL,
|
||||
UNIQUE(book, format)
|
||||
);
|
||||
|
||||
CREATE VIEW meta AS
|
||||
SELECT id, title,
|
||||
(SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
|
||||
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
|
||||
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
|
||||
timestamp,
|
||||
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
|
||||
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
|
||||
(SELECT text FROM comments WHERE book=books.id) comments,
|
||||
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
|
||||
series_index,
|
||||
sort,
|
||||
author_sort,
|
||||
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
|
||||
isbn
|
||||
FROM books;
|
||||
SELECT id, title,
|
||||
(SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
|
||||
(SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
|
||||
(SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
|
||||
timestamp,
|
||||
(SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
|
||||
(SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
|
||||
(SELECT text FROM comments WHERE book=books.id) comments,
|
||||
(SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
|
||||
series_index,
|
||||
sort,
|
||||
author_sort,
|
||||
(SELECT concat(format) FROM data WHERE data.book=books.id) formats,
|
||||
isbn,
|
||||
path,
|
||||
lccn,
|
||||
pubdate,
|
||||
flags,
|
||||
uuid
|
||||
FROM books;
|
||||
CREATE VIEW tag_browser_authors AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.author=authors.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||
sort AS sort
|
||||
FROM authors;
|
||||
CREATE VIEW tag_browser_filtered_authors AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(books_authors_link.id) FROM books_authors_link WHERE
|
||||
author=authors.id AND books_list_filter(book)) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.author=authors.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||
books_list_filter(bl.book)) avg_rating,
|
||||
sort AS sort
|
||||
FROM authors;
|
||||
CREATE VIEW tag_browser_filtered_publishers AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(books_publishers_link.id) FROM books_publishers_link WHERE
|
||||
publisher=publishers.id AND books_list_filter(book)) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||
books_list_filter(bl.book)) avg_rating,
|
||||
name AS sort
|
||||
FROM publishers;
|
||||
CREATE VIEW tag_browser_filtered_ratings AS SELECT
|
||||
id,
|
||||
rating,
|
||||
(SELECT COUNT(books_ratings_link.id) FROM books_ratings_link WHERE
|
||||
rating=ratings.id AND books_list_filter(book)) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.rating=ratings.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||
books_list_filter(bl.book)) avg_rating,
|
||||
rating AS sort
|
||||
FROM ratings;
|
||||
CREATE VIEW tag_browser_filtered_series AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(books_series_link.id) FROM books_series_link WHERE
|
||||
series=series.id AND books_list_filter(book)) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_series_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.series=series.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||
books_list_filter(bl.book)) avg_rating,
|
||||
(title_sort(name)) AS sort
|
||||
FROM series;
|
||||
CREATE VIEW tag_browser_filtered_tags AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(books_tags_link.id) FROM books_tags_link WHERE
|
||||
tag=tags.id AND books_list_filter(book)) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.tag=tags.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0 AND
|
||||
books_list_filter(bl.book)) avg_rating,
|
||||
name AS sort
|
||||
FROM tags;
|
||||
CREATE VIEW tag_browser_publishers AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||
name AS sort
|
||||
FROM publishers;
|
||||
CREATE VIEW tag_browser_ratings AS SELECT
|
||||
id,
|
||||
rating,
|
||||
(SELECT COUNT(id) FROM books_ratings_link WHERE rating=ratings.id) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.rating=ratings.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||
rating AS sort
|
||||
FROM ratings;
|
||||
CREATE VIEW tag_browser_series AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(id) FROM books_series_link WHERE series=series.id) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_series_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.series=series.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||
(title_sort(name)) AS sort
|
||||
FROM series;
|
||||
CREATE VIEW tag_browser_tags AS SELECT
|
||||
id,
|
||||
name,
|
||||
(SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) count,
|
||||
(SELECT AVG(ratings.rating)
|
||||
FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
|
||||
WHERE tl.tag=tags.id AND bl.book=tl.book AND
|
||||
ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
|
||||
name AS sort
|
||||
FROM tags;
|
||||
CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE);
|
||||
CREATE INDEX books_authors_link_aidx ON books_authors_link (author);
|
||||
CREATE INDEX books_authors_link_bidx ON books_authors_link (book);
|
||||
CREATE INDEX books_idx ON books (sort COLLATE NOCASE);
|
||||
CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code);
|
||||
CREATE INDEX books_languages_link_bidx ON books_languages_link (book);
|
||||
CREATE INDEX books_publishers_link_aidx ON books_publishers_link (publisher);
|
||||
CREATE INDEX books_publishers_link_bidx ON books_publishers_link (book);
|
||||
CREATE INDEX books_ratings_link_aidx ON books_ratings_link (rating);
|
||||
@ -111,32 +273,38 @@ CREATE INDEX books_tags_link_bidx ON books_tags_link (book);
|
||||
CREATE INDEX comments_idx ON comments (book);
|
||||
CREATE INDEX conversion_options_idx_a ON conversion_options (format COLLATE NOCASE);
|
||||
CREATE INDEX conversion_options_idx_b ON conversion_options (book);
|
||||
CREATE INDEX custom_columns_idx ON custom_columns (label);
|
||||
CREATE INDEX data_idx ON data (book);
|
||||
CREATE INDEX formats_idx ON data (format);
|
||||
CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE);
|
||||
CREATE INDEX publishers_idx ON publishers (name COLLATE NOCASE);
|
||||
CREATE INDEX series_idx ON series (sort COLLATE NOCASE);
|
||||
CREATE INDEX series_idx ON series (name COLLATE NOCASE);
|
||||
CREATE INDEX tags_idx ON tags (name COLLATE NOCASE);
|
||||
CREATE TRIGGER books_delete_trg
|
||||
AFTER DELETE ON books
|
||||
BEGIN
|
||||
DELETE FROM books_authors_link WHERE book=OLD.id;
|
||||
DELETE FROM books_publishers_link WHERE book=OLD.id;
|
||||
DELETE FROM books_ratings_link WHERE book=OLD.id;
|
||||
DELETE FROM books_series_link WHERE book=OLD.id;
|
||||
DELETE FROM books_tags_link WHERE book=OLD.id;
|
||||
DELETE FROM data WHERE book=OLD.id;
|
||||
DELETE FROM comments WHERE book=OLD.id;
|
||||
DELETE FROM conversion_options WHERE book=OLD.id;
|
||||
AFTER DELETE ON books
|
||||
BEGIN
|
||||
DELETE FROM books_authors_link WHERE book=OLD.id;
|
||||
DELETE FROM books_publishers_link WHERE book=OLD.id;
|
||||
DELETE FROM books_ratings_link WHERE book=OLD.id;
|
||||
DELETE FROM books_series_link WHERE book=OLD.id;
|
||||
DELETE FROM books_tags_link WHERE book=OLD.id;
|
||||
DELETE FROM books_languages_link WHERE book=OLD.id;
|
||||
DELETE FROM data WHERE book=OLD.id;
|
||||
DELETE FROM comments WHERE book=OLD.id;
|
||||
DELETE FROM conversion_options WHERE book=OLD.id;
|
||||
DELETE FROM books_plugin_data WHERE book=OLD.id;
|
||||
DELETE FROM identifiers WHERE book=OLD.id;
|
||||
END;
|
||||
CREATE TRIGGER books_insert_trg
|
||||
AFTER INSERT ON books
|
||||
CREATE TRIGGER books_insert_trg AFTER INSERT ON books
|
||||
BEGIN
|
||||
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
|
||||
UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id;
|
||||
END;
|
||||
CREATE TRIGGER books_update_trg
|
||||
AFTER UPDATE ON books
|
||||
BEGIN
|
||||
UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
|
||||
END;
|
||||
AFTER UPDATE ON books
|
||||
BEGIN
|
||||
UPDATE books SET sort=title_sort(NEW.title)
|
||||
WHERE id=NEW.id AND OLD.title <> NEW.title;
|
||||
END;
|
||||
CREATE TRIGGER fkc_comments_insert
|
||||
BEFORE INSERT ON comments
|
||||
BEGIN
|
||||
@ -169,23 +337,41 @@ CREATE TRIGGER fkc_data_update
|
||||
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_delete_books_authors_link
|
||||
CREATE TRIGGER fkc_delete_on_authors
|
||||
BEFORE DELETE ON authors
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT COUNT(id) FROM books_authors_link WHERE book=OLD.book) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: author is still referenced')
|
||||
WHEN (SELECT COUNT(id) FROM books_authors_link WHERE author=OLD.id) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: authors is still referenced')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_delete_books_publishers_link
|
||||
CREATE TRIGGER fkc_delete_on_languages
|
||||
BEFORE DELETE ON languages
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: language is still referenced')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_delete_on_languages_link
|
||||
BEFORE INSERT ON books_languages_link
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
|
||||
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
||||
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
|
||||
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_delete_on_publishers
|
||||
BEFORE DELETE ON publishers
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE book=OLD.book) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: publisher is still referenced')
|
||||
WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=OLD.id) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: publishers is still referenced')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_delete_books_series_link
|
||||
CREATE TRIGGER fkc_delete_on_series
|
||||
BEFORE DELETE ON series
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
@ -193,12 +379,12 @@ CREATE TRIGGER fkc_delete_books_series_link
|
||||
THEN RAISE(ABORT, 'Foreign key violation: series is still referenced')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_delete_books_tags_link
|
||||
CREATE TRIGGER fkc_delete_on_tags
|
||||
BEFORE DELETE ON tags
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT COUNT(id) FROM books_tags_link WHERE tag=OLD.id) > 0
|
||||
THEN RAISE(ABORT, 'Foreign key violation: tag is still referenced')
|
||||
THEN RAISE(ABORT, 'Foreign key violation: tags is still referenced')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_insert_books_authors_link
|
||||
@ -267,6 +453,22 @@ CREATE TRIGGER fkc_update_books_authors_link_b
|
||||
THEN RAISE(ABORT, 'Foreign key violation: author not in authors')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_update_books_languages_link_a
|
||||
BEFORE UPDATE OF book ON books_languages_link
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
|
||||
THEN RAISE(ABORT, 'Foreign key violation: book not in books')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_update_books_languages_link_b
|
||||
BEFORE UPDATE OF lang_code ON books_languages_link
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
|
||||
THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
|
||||
END;
|
||||
END;
|
||||
CREATE TRIGGER fkc_update_books_publishers_link_a
|
||||
BEFORE UPDATE OF book ON books_publishers_link
|
||||
BEGIN
|
||||
@ -341,3 +543,4 @@ CREATE TRIGGER series_update_trg
|
||||
BEGIN
|
||||
UPDATE series SET sort=NEW.name WHERE id=NEW.id;
|
||||
END;
|
||||
pragma user_version=20;
|
||||
|
@ -5,6 +5,7 @@
|
||||
"strcat": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n i = 0\n res = ''\n for i in range(0, len(args)):\n res += args[i]\n return res\n",
|
||||
"substr": "def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):\n return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]\n",
|
||||
"ifempty": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):\n if val:\n return val\n else:\n return value_if_empty\n",
|
||||
"booksize": "def evaluate(self, formatter, kwargs, mi, locals):\n if mi.book_size is not None:\n try:\n return str(mi.book_size)\n except:\n pass\n return ''\n",
|
||||
"select": "def evaluate(self, formatter, kwargs, mi, locals, val, key):\n if not val:\n return ''\n vals = [v.strip() for v in val.split(',')]\n for v in vals:\n if v.startswith(key+':'):\n return v[len(key)+1:]\n return ''\n",
|
||||
"field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return formatter.get_value(name, [], kwargs)\n",
|
||||
"subtract": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x - y)\n",
|
||||
@ -25,9 +26,9 @@
|
||||
"capitalize": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return capitalize(val)\n",
|
||||
"count": "def evaluate(self, formatter, kwargs, mi, locals, val, sep):\n return unicode(len(val.split(sep)))\n",
|
||||
"lowercase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return val.lower()\n",
|
||||
"assign": "def evaluate(self, formatter, kwargs, mi, locals, target, value):\n locals[target] = value\n return value\n",
|
||||
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val):\n return args[i+1]\n i += 2\n",
|
||||
"strcmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n v = strcmp(x, y)\n if v < 0:\n return lt\n if v == 0:\n return eq\n return gt\n",
|
||||
"switch": "def evaluate(self, formatter, kwargs, mi, locals, val, *args):\n if (len(args) % 2) != 1:\n raise ValueError(_('switch requires an odd number of arguments'))\n i = 0\n while i < len(args):\n if i + 1 >= len(args):\n return args[i]\n if re.search(args[i], val):\n return args[i+1]\n i += 2\n",
|
||||
"assign": "def evaluate(self, formatter, kwargs, mi, locals, target, value):\n locals[target] = value\n return value\n",
|
||||
"raw_field": "def evaluate(self, formatter, kwargs, mi, locals, name):\n return unicode(getattr(mi, name, None))\n",
|
||||
"cmp": "def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):\n x = float(x if x else 0)\n y = float(y if y else 0)\n if x < y:\n return lt\n if x == y:\n return eq\n return gt\n"
|
||||
}
|
46
resources/templates/book_details.css
Normal file
@ -0,0 +1,46 @@
|
||||
a {
|
||||
text-decoration: none;
|
||||
color: blue
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: red
|
||||
}
|
||||
|
||||
.comments {
|
||||
margin-top: 0;
|
||||
padding-top: 0;
|
||||
text-indent: 0
|
||||
}
|
||||
|
||||
table.fields {
|
||||
margin-bottom: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
table.fields td {
|
||||
vertical-align: top
|
||||
}
|
||||
|
||||
table.fields td.title {
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
.series_name {
|
||||
font-style: italic
|
||||
}
|
||||
|
||||
/*
|
||||
The HTML that this styleshhet applies to looks like this:
|
||||
|
||||
<table class="fields">
|
||||
<tr id="formats" class="datatype_text"><td class="title">Formats:</td><td><a href="format:572:EPUB">EPUB</a>, <a href="format:572:LIT">LIT</a></td></tr>
|
||||
<tr id="series" class="datatype_series"><td class="title">Series:</td><td>Book II of <span class="series_name">The Sea Beggars</span></td></tr>
|
||||
<tr id="tags" class="datatype_text"><td class="title">Tags:</td><td>Fantasy, Fiction</td></tr>
|
||||
<tr id="path" class="datatype_text"><td class="title">Path:</td><td><a href="path:572" title="/home/kovid/test library/Paul Kearney/This Forsaken Earth (572)">Click to open</a></td></tr>
|
||||
</table>
|
||||
|
||||
<div id="comments" class="comments"><h3>From Publishers Weekly</h3><p>At the start of Kearney's rousing sequel to <em>The Mark of Ran</em> (2005), Rol Cortishane, the youthful captain of the privateer <em>Revenant</em>, captures a slaver and frees its chained slaves. Back in the harbor of Ganesh Ka in the land of Umer, Rol encounters an untrustworthy acquaintance he hasn't seen in years, Canker, a former king of thieves, who urges Rol to join in the fight to save Rowen, a darkly beautiful queen, whose throne is at risk in mountainous Bionar. That Rowen is Rol's half-sister for whom he has lusted in the past doesn't make Rol's decision to help an easy one. If as in <em>The Mark of Ran</em> the action is more lively at sea than on land, Kearney's solid storytelling and nautical detail worthy of C.S. Forester or Patrick O'Brian will keep readers turning the pages. <em>(Dec.)</em> <br />Copyright © Reed Business Information, a division of Reed Elsevier Inc. All rights reserved. </p><h3>From</h3><p>The sequel to <em>The Mark of Ran</em> (2005) finds heroic young Rol Cortishane grown to be a much-feared sea captain. Deciding to ignore his mysterious past, he spends his energy on ship and crew. He is still an outlaw, however, and the only port he can call home is Ganesh Ka, the endangered city of exiles. When word comes from Rowan, his half-sister, asking him to fight on her behalf, he must weigh the safety of Ganesh Ka against Rowan's treachery in the past. Finally persuaded to aid Rowan, he learns more of betrayal and his heritage in the ensuing battles than he had wanted to know. Kearney's characters are much better developed here than they were in <em>The Mark of Ran</em>, and since the book tells a single story, the plot is tighter. Moreover, because almost all the action transpires in the here and now, the sequel can be read without reference to the predecessor. Since it ends hanging on a particularly bloody cliff, expect to see more of Kearney's excellent maritime fantasy. <em>Frieda Murray</em><br /><em>Copyright © American Library Association. All rights reserved</em></p>
|
||||
</div>
|
||||
*/
|
||||
|
4
setup.py
@ -15,9 +15,9 @@ from setup import prints, get_warnings
|
||||
|
||||
def check_version_info():
|
||||
vi = sys.version_info
|
||||
if vi[0] == 2 and vi[1] > 5:
|
||||
if vi[0] == 2 and vi[1] > 6:
|
||||
return None
|
||||
return 'calibre requires python >= 2.6'
|
||||
return 'calibre requires python >= 2.7 and < 3'
|
||||
|
||||
def option_parser():
|
||||
parser = optparse.OptionParser()
|
||||
|
@ -24,8 +24,10 @@ def initialize_constants():
|
||||
global __version__, __appname__, modules, functions, basenames, scripts
|
||||
|
||||
src = open('src/calibre/constants.py', 'rb').read()
|
||||
__version__ = re.search(r'__version__\s+=\s+[\'"]([^\'"]+)[\'"]', src).group(1)
|
||||
__appname__ = re.search(r'__appname__\s+=\s+[\'"]([^\'"]+)[\'"]', src).group(1)
|
||||
nv = re.search(r'numeric_version\s+=\s+\((\d+), (\d+), (\d+)\)', src)
|
||||
__version__ = '%s.%s.%s'%(nv.group(1), nv.group(2), nv.group(3))
|
||||
__appname__ = re.search(r'__appname__\s+=\s+(u{0,1})[\'"]([^\'"]+)[\'"]',
|
||||
src).group(2)
|
||||
epsrc = re.compile(r'entry_points = (\{.*?\})', re.DOTALL).\
|
||||
search(open('src/calibre/linux.py', 'rb').read()).group(1)
|
||||
entry_points = eval(epsrc, {'__appname__': __appname__})
|
||||
|
@ -13,7 +13,8 @@ from setup import Command, modules, functions, basenames, __version__, \
|
||||
from setup.build_environment import msvc, MT, RC
|
||||
from setup.installer.windows.wix import WixMixIn
|
||||
|
||||
QT_DIR = 'Q:\\Qt\\4.7.1'
|
||||
OPENSSL_DIR = r'Q:\openssl'
|
||||
QT_DIR = 'Q:\\Qt\\4.7.2'
|
||||
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
|
||||
LIBUSB_DIR = 'C:\\libusb'
|
||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
@ -108,6 +109,8 @@ class Win32Freeze(Command, WixMixIn):
|
||||
self.dll_dir = self.j(self.base, 'DLLs')
|
||||
shutil.copytree(r'C:\Python%s\DLLs'%self.py_ver, self.dll_dir,
|
||||
ignore=shutil.ignore_patterns('msvc*.dll', 'Microsoft.*'))
|
||||
for x in glob.glob(self.j(OPENSSL_DIR, 'bin', '*.dll')):
|
||||
shutil.copy2(x, self.dll_dir)
|
||||
for x in QT_DLLS:
|
||||
x += '4.dll'
|
||||
if not x.startswith('phonon'): x = 'Qt'+x
|
||||
|
@ -53,12 +53,42 @@ SQLite
|
||||
|
||||
Put sqlite3*.h from the sqlite windows amlgamation in ~/sw/include
|
||||
|
||||
OpenSSL
|
||||
--------
|
||||
|
||||
First install ActiveState Perl if you dont already have perl in windows
|
||||
Download and untar the openssl tarball, follow the instructions in INSTALL.W32 (use no-asm)
|
||||
to install use prefix q:\openssl
|
||||
|
||||
perl Configure VC-WIN32 no-asm enable-static-engine --prefix=Q:/openssl
|
||||
ms\do_ms.bat
|
||||
nmake -f ms\ntdll.mak
|
||||
nmake -f ms\ntdll.mak test
|
||||
nmake -f ms\ntdll.mak install
|
||||
|
||||
Qt
|
||||
--------
|
||||
|
||||
Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
|
||||
Extract Qt sourcecode to C:\Qt\4.x.x.
|
||||
|
||||
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs && nmake
|
||||
Qt uses its own routine to locate and load "system libraries" including the openssl libraries needed for "Get Books". This means that we have to apply the following patch to have Qt load the openssl libraries bundled with calibre:
|
||||
|
||||
|
||||
--- src/corelib/plugin/qsystemlibrary.cpp 2011-02-22 05:04:00.000000000 -0700
|
||||
+++ src/corelib/plugin/qsystemlibrary.cpp 2011-04-25 20:53:13.635247466 -0600
|
||||
@@ -110,7 +110,7 @@ HINSTANCE QSystemLibrary::load(const wch
|
||||
|
||||
#if !defined(QT_BOOTSTRAPPED)
|
||||
if (!onlySystemDirectory)
|
||||
- searchOrder << QFileInfo(qAppFileName()).path();
|
||||
+ searchOrder << (QFileInfo(qAppFileName()).path().replace(QLatin1Char('/'), QLatin1Char('\\')) + QString::fromLatin1("\\DLLs\\"));
|
||||
#endif
|
||||
searchOrder << qSystemDirectory();
|
||||
|
||||
|
||||
Now, run configure and make::
|
||||
|
||||
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
|
||||
|
||||
SIP
|
||||
-----
|
||||
|
@ -11,7 +11,10 @@
|
||||
SummaryCodepage='1252' />
|
||||
|
||||
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
|
||||
|
||||
<!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
|
||||
after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
|
||||
<Property Id='REINSTALLMODE' Value='emus'/>
|
||||
|
||||
<Upgrade Id="{upgrade_code}">
|
||||
<UpgradeVersion Maximum="{version}"
|
||||
IncludeMaximum="yes"
|
||||
|
@ -347,9 +347,10 @@ class UploadUserManual(Command): # {{{
|
||||
with NamedTemporaryFile(suffix='.zip') as f:
|
||||
os.fchmod(f.fileno(),
|
||||
stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
|
||||
with CurrentDir(self.d(path)):
|
||||
with CurrentDir(path):
|
||||
with ZipFile(f, 'w') as zf:
|
||||
for x in os.listdir('.'):
|
||||
if x.endswith('.swp'): continue
|
||||
zf.write(x)
|
||||
if os.path.isdir(x):
|
||||
for y in os.listdir(x):
|
||||
|
@ -3,11 +3,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import uuid, sys, os, re, logging, time, random, \
|
||||
__builtin__, warnings, multiprocessing
|
||||
from contextlib import closing
|
||||
from urllib import getproxies
|
||||
from urllib2 import unquote as urllib2_unquote
|
||||
import sys, os, re, time, random, __builtin__, warnings
|
||||
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
|
||||
from htmlentitydefs import name2codepoint
|
||||
from math import floor
|
||||
@ -16,25 +12,51 @@ from functools import partial
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
|
||||
|
||||
from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
|
||||
terminal_controller, preferred_encoding, \
|
||||
__appname__, __version__, __author__, \
|
||||
win32event, win32api, winerror, fcntl, \
|
||||
filesystem_encoding, plugins, config_dir
|
||||
from calibre.startup import winutil, winutilerror, guess_type
|
||||
from calibre.constants import (iswindows, isosx, islinux, isfreebsd, isfrozen,
|
||||
preferred_encoding, __appname__, __version__, __author__,
|
||||
win32event, win32api, winerror, fcntl,
|
||||
filesystem_encoding, plugins, config_dir)
|
||||
from calibre.startup import winutil, winutilerror
|
||||
|
||||
if islinux and not getattr(sys, 'frozen', False):
|
||||
# Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
|
||||
if False and islinux and not getattr(sys, 'frozen', False):
|
||||
# Imported before PyQt4 to workaround PyQt4 util-linux conflict discovered on gentoo
|
||||
# See http://bugs.gentoo.org/show_bug.cgi?id=317557
|
||||
# Importing uuid is slow so get rid of this at some point, maybe in a few
|
||||
# years when even Debian has caught up
|
||||
# Also remember to remove it from site.py in the binary builds
|
||||
import uuid
|
||||
uuid.uuid4()
|
||||
|
||||
if False:
|
||||
# Prevent pyflakes from complaining
|
||||
winutil, winutilerror, __appname__, islinux, __version__
|
||||
fcntl, win32event, isfrozen, __author__, terminal_controller
|
||||
winerror, win32api, isfreebsd, guess_type
|
||||
fcntl, win32event, isfrozen, __author__
|
||||
winerror, win32api, isfreebsd
|
||||
|
||||
import cssutils
|
||||
cssutils.log.setLevel(logging.WARN)
|
||||
_mt_inited = False
|
||||
def _init_mimetypes():
|
||||
global _mt_inited
|
||||
import mimetypes
|
||||
mimetypes.init([P('mime.types')])
|
||||
_mt_inited = True
|
||||
|
||||
def guess_type(*args, **kwargs):
|
||||
import mimetypes
|
||||
if not _mt_inited:
|
||||
_init_mimetypes()
|
||||
return mimetypes.guess_type(*args, **kwargs)
|
||||
|
||||
def guess_all_extensions(*args, **kwargs):
|
||||
import mimetypes
|
||||
if not _mt_inited:
|
||||
_init_mimetypes()
|
||||
return mimetypes.guess_all_extensions(*args, **kwargs)
|
||||
|
||||
def get_types_map():
|
||||
import mimetypes
|
||||
if not _mt_inited:
|
||||
_init_mimetypes()
|
||||
return mimetypes.types_map
|
||||
|
||||
def to_unicode(raw, encoding='utf-8', errors='strict'):
|
||||
if isinstance(raw, unicode):
|
||||
@ -182,6 +204,7 @@ class CommandLineError(Exception):
|
||||
pass
|
||||
|
||||
def setup_cli_handlers(logger, level):
|
||||
import logging
|
||||
if os.environ.get('CALIBRE_WORKER', None) is not None and logger.handlers:
|
||||
return
|
||||
logger.setLevel(level)
|
||||
@ -243,6 +266,7 @@ def extract(path, dir):
|
||||
extractor(path, dir)
|
||||
|
||||
def get_proxies(debug=True):
|
||||
from urllib import getproxies
|
||||
proxies = getproxies()
|
||||
for key, proxy in list(proxies.items()):
|
||||
if not proxy or '..' in proxy:
|
||||
@ -323,7 +347,7 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
|
||||
opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
|
||||
opener.set_handle_robots(False)
|
||||
if user_agent is None:
|
||||
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
||||
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
|
||||
opener.addheaders = [('User-agent', user_agent)]
|
||||
http_proxy = get_proxies().get('http', None)
|
||||
if http_proxy:
|
||||
@ -364,7 +388,11 @@ class CurrentDir(object):
|
||||
return self.cwd
|
||||
|
||||
def __exit__(self, *args):
|
||||
os.chdir(self.cwd)
|
||||
try:
|
||||
os.chdir(self.cwd)
|
||||
except:
|
||||
# The previous CWD no longer exists
|
||||
pass
|
||||
|
||||
|
||||
class StreamReadWrapper(object):
|
||||
@ -386,6 +414,7 @@ class StreamReadWrapper(object):
|
||||
|
||||
def detect_ncpus():
|
||||
"""Detects the number of effective CPUs in the system"""
|
||||
import multiprocessing
|
||||
ans = -1
|
||||
try:
|
||||
ans = multiprocessing.cpu_count()
|
||||
@ -547,8 +576,14 @@ def url_slash_cleaner(url):
|
||||
return re.sub(r'(?<!:)/{2,}', '/', url)
|
||||
|
||||
def get_download_filename(url, cookie_file=None):
|
||||
'''
|
||||
Get a local filename for a URL using the content disposition header
|
||||
'''
|
||||
from contextlib import closing
|
||||
from urllib2 import unquote as urllib2_unquote
|
||||
|
||||
filename = ''
|
||||
|
||||
|
||||
br = browser()
|
||||
if cookie_file:
|
||||
from mechanize import MozillaCookieJar
|
||||
@ -676,4 +711,3 @@ main()
|
||||
ipshell()
|
||||
sys.argv = old_argv
|
||||
|
||||
|
||||
|
@ -1,28 +1,32 @@
|
||||
from future_builtins import map
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.7.54'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
import re, importlib
|
||||
_ver = __version__.split('.')
|
||||
_ver = [int(re.search(r'(\d+)', x).group(1)) for x in _ver]
|
||||
numeric_version = tuple(_ver)
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 7, 57)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
'''
|
||||
Various run time constants.
|
||||
'''
|
||||
|
||||
import sys, locale, codecs, os
|
||||
from calibre.utils.terminfo import TerminalController
|
||||
import sys, locale, codecs, os, importlib, collections
|
||||
|
||||
terminal_controller = TerminalController(sys.stdout)
|
||||
_tc = None
|
||||
def terminal_controller():
|
||||
global _tc
|
||||
if _tc is None:
|
||||
from calibre.utils.terminfo import TerminalController
|
||||
_tc = TerminalController(sys.stdout)
|
||||
return _tc
|
||||
|
||||
iswindows = 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower()
|
||||
isosx = 'darwin' in sys.platform.lower()
|
||||
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
|
||||
isfreebsd = 'freebsd' in sys.platform.lower()
|
||||
_plat = sys.platform.lower()
|
||||
iswindows = 'win32' in _plat or 'win64' in _plat
|
||||
isosx = 'darwin' in _plat
|
||||
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
|
||||
isfreebsd = 'freebsd' in _plat
|
||||
islinux = not(iswindows or isosx or isfreebsd)
|
||||
isfrozen = hasattr(sys, 'frozen')
|
||||
isunix = isosx or islinux
|
||||
@ -41,6 +45,7 @@ fcntl = None if iswindows else importlib.import_module('fcntl')
|
||||
filesystem_encoding = sys.getfilesystemencoding()
|
||||
if filesystem_encoding is None: filesystem_encoding = 'utf-8'
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
||||
def debug():
|
||||
@ -48,15 +53,12 @@ def debug():
|
||||
DEBUG = True
|
||||
|
||||
# plugins {{{
|
||||
plugins = None
|
||||
if plugins is None:
|
||||
# Load plugins
|
||||
def load_plugins():
|
||||
plugins = {}
|
||||
plugin_path = sys.extensions_location
|
||||
sys.path.insert(0, plugin_path)
|
||||
|
||||
for plugin in [
|
||||
class Plugins(collections.Mapping):
|
||||
|
||||
def __init__(self):
|
||||
self._plugins = {}
|
||||
plugins = [
|
||||
'pictureflow',
|
||||
'lzx',
|
||||
'msdes',
|
||||
@ -70,19 +72,44 @@ if plugins is None:
|
||||
'chm_extra',
|
||||
'icu',
|
||||
'speedup',
|
||||
] + \
|
||||
(['winutil'] if iswindows else []) + \
|
||||
(['usbobserver'] if isosx else []):
|
||||
try:
|
||||
p, err = importlib.import_module(plugin), ''
|
||||
except Exception as err:
|
||||
p = None
|
||||
err = str(err)
|
||||
plugins[plugin] = (p, err)
|
||||
sys.path.remove(plugin_path)
|
||||
return plugins
|
||||
]
|
||||
if iswindows:
|
||||
plugins.append('winutil')
|
||||
if isosx:
|
||||
plugins.append('usbobserver')
|
||||
self.plugins = frozenset(plugins)
|
||||
|
||||
plugins = load_plugins()
|
||||
def load_plugin(self, name):
|
||||
if name in self._plugins:
|
||||
return
|
||||
sys.path.insert(0, sys.extensions_location)
|
||||
try:
|
||||
p, err = importlib.import_module(name), ''
|
||||
except Exception as err:
|
||||
p = None
|
||||
err = str(err)
|
||||
self._plugins[name] = (p, err)
|
||||
sys.path.remove(sys.extensions_location)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.plugins)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.plugins)
|
||||
|
||||
def __contains__(self, name):
|
||||
return name in self.plugins
|
||||
|
||||
def __getitem__(self, name):
|
||||
if name not in self.plugins:
|
||||
raise KeyError('No plugin named %r'%name)
|
||||
self.load_plugin(name)
|
||||
return self._plugins[name]
|
||||
|
||||
|
||||
plugins = None
|
||||
if plugins is None:
|
||||
plugins = Plugins()
|
||||
# }}}
|
||||
|
||||
# config_dir {{{
|
||||
|
@ -603,40 +603,34 @@ class PreferencesPlugin(Plugin): # {{{
|
||||
# }}}
|
||||
|
||||
class StoreBase(Plugin): # {{{
|
||||
|
||||
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'John Schember'
|
||||
type = _('Store')
|
||||
# This needs to be changed to (0, 8, 0)
|
||||
minimum_calibre_version = (0, 4, 118)
|
||||
|
||||
actual_plugin = None
|
||||
actual_plugin_object = None
|
||||
|
||||
def load_actual_plugin(self, gui):
|
||||
'''
|
||||
This method must return the actual interface action plugin object.
|
||||
'''
|
||||
mod, cls = self.actual_plugin.split(':')
|
||||
self.actual_plugin_object = getattr(__import__(mod, fromlist=['1'], level=0), cls)(gui, self.name)
|
||||
self.actual_plugin_object = getattr(importlib.import_module(mod), cls)(gui, self.name)
|
||||
return self.actual_plugin_object
|
||||
|
||||
def customization_help(self, gui=False):
|
||||
if self.actual_plugin_object:
|
||||
if getattr(self, 'actual_plugin_object', None) is not None:
|
||||
return self.actual_plugin_object.customization_help(gui)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
raise NotImplementedError()
|
||||
|
||||
def config_widget(self):
|
||||
if self.actual_plugin_object:
|
||||
if getattr(self, 'actual_plugin_object', None) is not None:
|
||||
return self.actual_plugin_object.config_widget()
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError()
|
||||
|
||||
def save_settings(self, config_widget):
|
||||
if self.actual_plugin_object:
|
||||
if getattr(self, 'actual_plugin_object', None) is not None:
|
||||
return self.actual_plugin_object.save_settings(config_widget)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
raise NotImplementedError()
|
||||
|
||||
# }}}
|
||||
|
@ -9,7 +9,6 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
|
||||
from calibre.constants import numeric_version
|
||||
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||
from calibre.utils.config import test_eight_code
|
||||
|
||||
# To archive plugins {{{
|
||||
@ -98,6 +97,8 @@ class TXT2TXTZ(FileTypePlugin):
|
||||
on_import = True
|
||||
|
||||
def _get_image_references(self, txt, base_dir):
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||
|
||||
images = []
|
||||
|
||||
# Textile
|
||||
@ -625,8 +626,10 @@ if test_eight_code:
|
||||
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
||||
from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||
from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
|
||||
from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary]
|
||||
plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive]
|
||||
|
||||
# }}}
|
||||
else:
|
||||
@ -852,7 +855,7 @@ class ActionTweakEpub(InterfaceActionBase):
|
||||
class ActionNextMatch(InterfaceActionBase):
|
||||
name = 'Next Match'
|
||||
actual_plugin = 'calibre.gui2.actions.next_match:NextMatchAction'
|
||||
|
||||
|
||||
class ActionStore(InterfaceActionBase):
|
||||
name = 'Store'
|
||||
author = 'John Schember'
|
||||
@ -864,7 +867,10 @@ plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
|
||||
ActionRestart, ActionOpenFolder, ActionConnectShare,
|
||||
ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
|
||||
ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
|
||||
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore]
|
||||
ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch]
|
||||
|
||||
if test_eight_code:
|
||||
plugins += [ActionStore]
|
||||
|
||||
# }}}
|
||||
|
||||
@ -1097,20 +1103,12 @@ if test_eight_code:
|
||||
|
||||
#}}}
|
||||
|
||||
# New metadata download plugins {{{
|
||||
from calibre.ebooks.metadata.sources.google import GoogleBooks
|
||||
from calibre.ebooks.metadata.sources.amazon import Amazon
|
||||
|
||||
plugins += [GoogleBooks, Amazon]
|
||||
|
||||
# }}}
|
||||
|
||||
# Store plugins {{{
|
||||
class StoreAmazonKindleStore(StoreBase):
|
||||
name = 'Amazon Kindle'
|
||||
description = _('Kindle books from Amazon')
|
||||
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
|
||||
|
||||
|
||||
class StoreBaenWebScriptionStore(StoreBase):
|
||||
name = 'Baen WebScription'
|
||||
description = _('Ebooks for readers.')
|
||||
@ -1141,11 +1139,11 @@ class StoreEHarlequinStoretore(StoreBase):
|
||||
description = _('entertain, enrich, inspire.')
|
||||
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
|
||||
|
||||
class StoreFeedbooksStore(StoreBase):
|
||||
class StoreFeedbooksStore(StoreBase):
|
||||
name = 'Feedbooks'
|
||||
description = _('Read anywhere.')
|
||||
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
|
||||
|
||||
|
||||
class StoreGutenbergStore(StoreBase):
|
||||
name = 'Project Gutenberg'
|
||||
description = _('The first producer of free ebooks.')
|
||||
@ -1163,8 +1161,8 @@ class StoreManyBooksStore(StoreBase):
|
||||
|
||||
class StoreMobileReadStore(StoreBase):
|
||||
name = 'MobileRead'
|
||||
description = _('Handcrafted with utmost care ;)')
|
||||
actual_plugin = 'calibre.gui2.store.mobileread_plugin:MobileReadStore'
|
||||
description = _('Ebooks handcrafted with the utmost care')
|
||||
actual_plugin = 'calibre.gui2.store.mobileread.mobileread_plugin:MobileReadStore'
|
||||
|
||||
class StoreOpenLibraryStore(StoreBase):
|
||||
name = 'Open Library'
|
||||
|
@ -344,6 +344,7 @@ class iPadOutput(OutputProfile):
|
||||
border-spacing:1px;
|
||||
margin-left: 5%;
|
||||
margin-right: 5%;
|
||||
page-break-inside:avoid;
|
||||
width: 90%;
|
||||
-webkit-border-radius:4px;
|
||||
}
|
||||
|
@ -22,6 +22,11 @@ from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
|
||||
from calibre.ebooks.epub.fix import ePubFixer
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
|
||||
builtin_names = frozenset([p.name for p in builtin_plugins])
|
||||
|
||||
class NameConflict(ValueError):
|
||||
pass
|
||||
|
||||
def _config():
|
||||
c = Config('customize')
|
||||
c.add_opt('plugins', default={}, help=_('Installed plugins'))
|
||||
@ -355,6 +360,9 @@ def set_file_type_metadata(stream, mi, ftype):
|
||||
def add_plugin(path_to_zip_file):
|
||||
make_config_dir()
|
||||
plugin = load_plugin(path_to_zip_file)
|
||||
if plugin.name in builtin_names:
|
||||
raise NameConflict(
|
||||
'A builtin plugin with the name %r already exists' % plugin.name)
|
||||
plugin = initialize_plugin(plugin, path_to_zip_file)
|
||||
plugins = config['plugins']
|
||||
zfp = os.path.join(plugin_dir, plugin.name+'.zip')
|
||||
@ -506,7 +514,11 @@ def initialize_plugin(plugin, path_to_zip_file):
|
||||
def initialize_plugins():
|
||||
global _initialized_plugins
|
||||
_initialized_plugins = []
|
||||
for zfp in list(config['plugins'].values()) + builtin_plugins:
|
||||
conflicts = [name for name in config['plugins'] if name in
|
||||
builtin_names]
|
||||
for p in conflicts:
|
||||
remove_plugin(p)
|
||||
for zfp in list(config['plugins'].itervalues()) + builtin_plugins:
|
||||
try:
|
||||
try:
|
||||
plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
|
||||
|
@ -51,6 +51,8 @@ Run an embedded python interpreter.
|
||||
'with sqlite3 works.')
|
||||
parser.add_option('-p', '--py-console', help='Run python console',
|
||||
default=False, action='store_true')
|
||||
parser.add_option('-m', '--inspect-mobi',
|
||||
help='Inspect the MOBI file at the specified path', default=None)
|
||||
|
||||
return parser
|
||||
|
||||
@ -104,7 +106,7 @@ def migrate(old, new):
|
||||
from calibre.library.database import LibraryDatabase
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
from calibre.utils.terminfo import ProgressBar
|
||||
from calibre import terminal_controller
|
||||
from calibre.constants import terminal_controller
|
||||
class Dummy(ProgressBar):
|
||||
def setLabelText(self, x): pass
|
||||
def setAutoReset(self, y): pass
|
||||
@ -117,7 +119,7 @@ def migrate(old, new):
|
||||
|
||||
db = LibraryDatabase(old)
|
||||
db2 = LibraryDatabase2(new)
|
||||
db2.migrate_old(db, Dummy(terminal_controller, 'Migrating database...'))
|
||||
db2.migrate_old(db, Dummy(terminal_controller(), 'Migrating database...'))
|
||||
prefs['library_path'] = os.path.abspath(new)
|
||||
print 'Database migrated to', os.path.abspath(new)
|
||||
|
||||
@ -227,6 +229,9 @@ def main(args=sys.argv):
|
||||
if len(args) > 1 and os.access(args[-1], os.R_OK):
|
||||
sql_dump = args[-1]
|
||||
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
|
||||
elif opts.inspect_mobi is not None:
|
||||
from calibre.ebooks.mobi.debug import inspect_mobi
|
||||
inspect_mobi(opts.inspect_mobi)
|
||||
else:
|
||||
from calibre import ipython
|
||||
ipython()
|
||||
|
@ -26,6 +26,7 @@ class ANDROID(USBMS):
|
||||
0xc92 : [0x100],
|
||||
0xc97 : [0x226],
|
||||
0xc99 : [0x0100],
|
||||
0xca2 : [0x226],
|
||||
0xca3 : [0x100],
|
||||
0xca4 : [0x226],
|
||||
},
|
||||
@ -54,6 +55,9 @@ class ANDROID(USBMS):
|
||||
0x6877 : [0x0400],
|
||||
},
|
||||
|
||||
# Viewsonic
|
||||
0x0489 : { 0xc001 : [0x0226], 0xc004 : [0x0226], },
|
||||
|
||||
# Acer
|
||||
0x502 : { 0x3203 : [0x0100]},
|
||||
|
||||
@ -105,10 +109,10 @@ class ANDROID(USBMS):
|
||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
|
||||
'MB860', 'MULTI-CARD']
|
||||
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7']
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -201,8 +201,9 @@ class ITUNES(DriverBase):
|
||||
# 0x1294 iPhone 3GS
|
||||
# 0x1297 iPhone 4
|
||||
# 0x129a iPad
|
||||
# 0x12a2 iPad2
|
||||
VENDOR_ID = [0x05ac]
|
||||
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a]
|
||||
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x12a2]
|
||||
BCD = [0x01]
|
||||
|
||||
# Plugboard ID
|
||||
@ -349,7 +350,7 @@ class ITUNES(DriverBase):
|
||||
break
|
||||
break
|
||||
if self.report_progress is not None:
|
||||
self.report_progress(j+1/task_count, _('Updating device metadata listing...'))
|
||||
self.report_progress((j+1)/task_count, _('Updating device metadata listing...'))
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress(1.0, _('Updating device metadata listing...'))
|
||||
@ -421,14 +422,14 @@ class ITUNES(DriverBase):
|
||||
|
||||
cached_books[this_book.path] = {
|
||||
'title':book.name(),
|
||||
'author':[book.artist()],
|
||||
'author':book.artist().split(' & '),
|
||||
'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
|
||||
'dev_book':book,
|
||||
'uuid': book.composer()
|
||||
}
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress(i+1/book_count, _('%d of %d') % (i+1, book_count))
|
||||
self.report_progress((i+1)/book_count, _('%d of %d') % (i+1, book_count))
|
||||
self._purge_orphans(library_books, cached_books)
|
||||
|
||||
elif iswindows:
|
||||
@ -459,14 +460,14 @@ class ITUNES(DriverBase):
|
||||
|
||||
cached_books[this_book.path] = {
|
||||
'title':book.Name,
|
||||
'author':book.Artist,
|
||||
'author':book.Artist.split(' & '),
|
||||
'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
|
||||
'uuid': book.Composer,
|
||||
'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub'
|
||||
}
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress(i+1/book_count,
|
||||
self.report_progress((i+1)/book_count,
|
||||
_('%d of %d') % (i+1, book_count))
|
||||
self._purge_orphans(library_books, cached_books)
|
||||
|
||||
@ -916,6 +917,8 @@ class ITUNES(DriverBase):
|
||||
"""
|
||||
if DEBUG:
|
||||
self.log.info("ITUNES.reset()")
|
||||
if report_progress:
|
||||
self.set_progress_reporter(report_progress)
|
||||
|
||||
def set_progress_reporter(self, report_progress):
|
||||
'''
|
||||
@ -924,6 +927,9 @@ class ITUNES(DriverBase):
|
||||
If it is called with -1 that means that the
|
||||
task does not have any progress information
|
||||
'''
|
||||
if DEBUG:
|
||||
self.log.info("ITUNES.set_progress_reporter()")
|
||||
|
||||
self.report_progress = report_progress
|
||||
|
||||
def set_plugboards(self, plugboards, pb_func):
|
||||
@ -1016,7 +1022,9 @@ class ITUNES(DriverBase):
|
||||
if isosx:
|
||||
for (i,file) in enumerate(files):
|
||||
format = file.rpartition('.')[2].lower()
|
||||
path = self.path_template % (metadata[i].title, metadata[i].author[0],format)
|
||||
path = self.path_template % (metadata[i].title,
|
||||
authors_to_string(metadata[i].authors),
|
||||
format)
|
||||
self._remove_existing_copy(path, metadata[i])
|
||||
fpath = self._get_fpath(file, metadata[i], format, update_md=True)
|
||||
db_added, lb_added = self._add_new_copy(fpath, metadata[i])
|
||||
@ -1029,9 +1037,11 @@ class ITUNES(DriverBase):
|
||||
if DEBUG:
|
||||
self.log.info("ITUNES.upload_books()")
|
||||
self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" %
|
||||
( metadata[i].title, metadata[i].author, metadata[i].uuid))
|
||||
(metadata[i].title,
|
||||
authors_to_string(metadata[i].authors),
|
||||
metadata[i].uuid))
|
||||
self.cached_books[this_book.path] = {
|
||||
'author': metadata[i].author,
|
||||
'author': authors_to_string(metadata[i].authors),
|
||||
'dev_book': db_added,
|
||||
'format': format,
|
||||
'lib_book': lb_added,
|
||||
@ -1041,7 +1051,7 @@ class ITUNES(DriverBase):
|
||||
|
||||
# Report progress
|
||||
if self.report_progress is not None:
|
||||
self.report_progress(i+1/file_count, _('%d of %d') % (i+1, file_count))
|
||||
self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))
|
||||
|
||||
elif iswindows:
|
||||
try:
|
||||
@ -1050,7 +1060,9 @@ class ITUNES(DriverBase):
|
||||
|
||||
for (i,file) in enumerate(files):
|
||||
format = file.rpartition('.')[2].lower()
|
||||
path = self.path_template % (metadata[i].title, metadata[i].author[0],format)
|
||||
path = self.path_template % (metadata[i].title,
|
||||
authors_to_string(metadata[i].authors),
|
||||
format)
|
||||
self._remove_existing_copy(path, metadata[i])
|
||||
fpath = self._get_fpath(file, metadata[i],format, update_md=True)
|
||||
db_added, lb_added = self._add_new_copy(fpath, metadata[i])
|
||||
@ -1070,9 +1082,11 @@ class ITUNES(DriverBase):
|
||||
if DEBUG:
|
||||
self.log.info("ITUNES.upload_books()")
|
||||
self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" %
|
||||
( metadata[i].title, metadata[i].author, metadata[i].uuid))
|
||||
(metadata[i].title,
|
||||
authors_to_string(metadata[i].authors),
|
||||
metadata[i].uuid))
|
||||
self.cached_books[this_book.path] = {
|
||||
'author': metadata[i].author[0],
|
||||
'author': authors_to_string(metadata[i].authors),
|
||||
'dev_book': db_added,
|
||||
'format': format,
|
||||
'lib_book': lb_added,
|
||||
@ -1081,7 +1095,7 @@ class ITUNES(DriverBase):
|
||||
|
||||
# Report progress
|
||||
if self.report_progress is not None:
|
||||
self.report_progress(i+1/file_count, _('%d of %d') % (i+1, file_count))
|
||||
self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))
|
||||
finally:
|
||||
pythoncom.CoUninitialize()
|
||||
|
||||
@ -1185,7 +1199,7 @@ class ITUNES(DriverBase):
|
||||
base_fn = base_fn.rpartition('.')[0]
|
||||
db_added = self._find_device_book(
|
||||
{ 'title': base_fn if format == 'pdf' else metadata.title,
|
||||
'author': metadata.authors[0],
|
||||
'author': authors_to_string(metadata.authors),
|
||||
'uuid': metadata.uuid,
|
||||
'format': format})
|
||||
return db_added
|
||||
@ -1250,7 +1264,7 @@ class ITUNES(DriverBase):
|
||||
base_fn = base_fn.rpartition('.')[0]
|
||||
added = self._find_library_book(
|
||||
{ 'title': base_fn if format == 'pdf' else metadata.title,
|
||||
'author': metadata.author[0],
|
||||
'author': authors_to_string(metadata.authors),
|
||||
'uuid': metadata.uuid,
|
||||
'format': format})
|
||||
return added
|
||||
@ -1309,7 +1323,7 @@ class ITUNES(DriverBase):
|
||||
with open(metadata.cover,'r+b') as cd:
|
||||
cover_data = cd.read()
|
||||
except:
|
||||
self.problem_titles.append("'%s' by %s" % (metadata.title, metadata.author[0]))
|
||||
self.problem_titles.append("'%s' by %s" % (metadata.title, authors_to_string(metadata.authors)))
|
||||
self.log.error(" error scaling '%s' for '%s'" % (metadata.cover,metadata.title))
|
||||
|
||||
import traceback
|
||||
@ -1384,7 +1398,7 @@ class ITUNES(DriverBase):
|
||||
thumb_path = path.rpartition('.')[0] + '.jpg'
|
||||
zfw.writestr(thumb_path, thumb)
|
||||
except:
|
||||
self.problem_titles.append("'%s' by %s" % (metadata.title, metadata.author[0]))
|
||||
self.problem_titles.append("'%s' by %s" % (metadata.title, authors_to_string(metadata.authors)))
|
||||
self.log.error(" error converting '%s' to thumb for '%s'" % (metadata.cover,metadata.title))
|
||||
finally:
|
||||
try:
|
||||
@ -1402,7 +1416,7 @@ class ITUNES(DriverBase):
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._create_new_book()")
|
||||
|
||||
this_book = Book(metadata.title, authors_to_string(metadata.author))
|
||||
this_book = Book(metadata.title, authors_to_string(metadata.authors))
|
||||
this_book.datetime = time.gmtime()
|
||||
this_book.db_id = None
|
||||
this_book.device_collections = []
|
||||
@ -2446,7 +2460,7 @@ class ITUNES(DriverBase):
|
||||
for book in self.cached_books:
|
||||
if self.cached_books[book]['uuid'] == metadata.uuid or \
|
||||
(self.cached_books[book]['title'] == metadata.title and \
|
||||
self.cached_books[book]['author'] == metadata.authors[0]):
|
||||
self.cached_books[book]['author'] == authors_to_string(metadata.authors)):
|
||||
self.update_list.append(self.cached_books[book])
|
||||
self._remove_from_device(self.cached_books[book])
|
||||
if DEBUG:
|
||||
@ -2465,7 +2479,7 @@ class ITUNES(DriverBase):
|
||||
for book in self.cached_books:
|
||||
if self.cached_books[book]['uuid'] == metadata.uuid or \
|
||||
(self.cached_books[book]['title'] == metadata.title and \
|
||||
self.cached_books[book]['author'] == metadata.authors[0]):
|
||||
self.cached_books[book]['author'] == authors_to_string(metadata.authors)):
|
||||
self.update_list.append(self.cached_books[book])
|
||||
self._remove_from_iTunes(self.cached_books[book])
|
||||
if DEBUG:
|
||||
@ -2934,13 +2948,13 @@ class ITUNES(DriverBase):
|
||||
def _xform_metadata_via_plugboard(self, book, format):
|
||||
''' Transform book metadata from plugboard templates '''
|
||||
if DEBUG:
|
||||
self.log.info(" ITUNES._xform_metadata_via_plugboard()")
|
||||
self.log.info(" ITUNES._xform_metadata_via_plugboard()")
|
||||
|
||||
if self.plugboard_func:
|
||||
pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards)
|
||||
newmi = book.deepcopy_metadata()
|
||||
newmi.template_to_attribute(book, pb)
|
||||
if DEBUG:
|
||||
if pb is not None and DEBUG:
|
||||
self.log.info(" transforming %s using %s:" % (format, pb))
|
||||
self.log.info(" title: %s %s" % (book.title, ">>> %s" %
|
||||
newmi.title if book.title != newmi.title else ''))
|
||||
@ -3057,7 +3071,7 @@ class ITUNES_ASYNC(ITUNES):
|
||||
|
||||
cached_books[this_book.path] = {
|
||||
'title':library_books[book].name(),
|
||||
'author':[library_books[book].artist()],
|
||||
'author':library_books[book].artist().split(' & '),
|
||||
'lib_book':library_books[book],
|
||||
'dev_book':None,
|
||||
'uuid': library_books[book].composer(),
|
||||
@ -3065,7 +3079,7 @@ class ITUNES_ASYNC(ITUNES):
|
||||
}
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress(i+1/book_count, _('%d of %d') % (i+1, book_count))
|
||||
self.report_progress((i+1)/book_count, _('%d of %d') % (i+1, book_count))
|
||||
|
||||
elif iswindows:
|
||||
try:
|
||||
@ -3097,14 +3111,14 @@ class ITUNES_ASYNC(ITUNES):
|
||||
|
||||
cached_books[this_book.path] = {
|
||||
'title':library_books[book].Name,
|
||||
'author':library_books[book].Artist,
|
||||
'author':library_books[book].Artist.split(' & '),
|
||||
'lib_book':library_books[book],
|
||||
'uuid': library_books[book].Composer,
|
||||
'format': format
|
||||
}
|
||||
|
||||
if self.report_progress is not None:
|
||||
self.report_progress(i+1/book_count,
|
||||
self.report_progress((i+1)/book_count,
|
||||
_('%d of %d') % (i+1, book_count))
|
||||
|
||||
finally:
|
||||
@ -3283,7 +3297,7 @@ class Book(Metadata):
|
||||
See ebooks.metadata.book.base
|
||||
'''
|
||||
def __init__(self,title,author):
|
||||
Metadata.__init__(self, title, authors=[author])
|
||||
Metadata.__init__(self, title, authors=author.split(' & '))
|
||||
|
||||
@property
|
||||
def title_sorter(self):
|
||||
|
@ -164,7 +164,7 @@ class APNXBuilder(object):
|
||||
if c == '/':
|
||||
closing = True
|
||||
continue
|
||||
elif c == 'p':
|
||||
elif c in ('d', 'p'):
|
||||
if closing:
|
||||
in_p = False
|
||||
else:
|
||||
|
@ -244,7 +244,7 @@ class EEEREADER(USBMS):
|
||||
FORMATS = ['epub', 'fb2', 'txt', 'pdf']
|
||||
|
||||
VENDOR_ID = [0x0b05]
|
||||
PRODUCT_ID = [0x178f]
|
||||
PRODUCT_ID = [0x178f, 0x17a1]
|
||||
BCD = [0x0319]
|
||||
|
||||
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Book'
|
||||
|
@ -8,7 +8,7 @@ manner.
|
||||
import sys, os, re
|
||||
from threading import RLock
|
||||
|
||||
from calibre import iswindows, isosx, plugins, islinux
|
||||
from calibre.constants import iswindows, isosx, plugins, islinux
|
||||
|
||||
osx_scanner = win_scanner = linux_scanner = None
|
||||
|
||||
|
@ -203,6 +203,8 @@ class CollectionsBookList(BookList):
|
||||
val = [orig_val]
|
||||
elif fm['datatype'] == 'text' and fm['is_multiple']:
|
||||
val = orig_val
|
||||
elif fm['datatype'] == 'composite' and fm['is_multiple']:
|
||||
val = [v.strip() for v in val.split(fm['is_multiple'])]
|
||||
else:
|
||||
val = [val]
|
||||
|
||||
|
@ -7,7 +7,7 @@ Code for the conversion of ebook formats and the reading of metadata
|
||||
from various formats.
|
||||
'''
|
||||
|
||||
import traceback, os
|
||||
import traceback, os, re
|
||||
from calibre import CurrentDir
|
||||
|
||||
class ConversionError(Exception):
|
||||
@ -26,7 +26,7 @@ class ParserError(ValueError):
|
||||
pass
|
||||
|
||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
|
||||
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
||||
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
||||
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb']
|
||||
|
||||
@ -169,3 +169,42 @@ def calibre_cover(title, author_string, series_string=None,
|
||||
lines.append(TextLine(series_string, author_size))
|
||||
return create_cover_page(lines, I('library.png'), output_format='jpg')
|
||||
|
||||
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$')
|
||||
|
||||
def unit_convert(value, base, font, dpi):
|
||||
' Return value in pts'
|
||||
if isinstance(value, (int, long, float)):
|
||||
return value
|
||||
try:
|
||||
return float(value) * 72.0 / dpi
|
||||
except:
|
||||
pass
|
||||
result = value
|
||||
m = UNIT_RE.match(value)
|
||||
if m is not None and m.group(1):
|
||||
value = float(m.group(1))
|
||||
unit = m.group(2)
|
||||
if unit == '%':
|
||||
result = (value / 100.0) * base
|
||||
elif unit == 'px':
|
||||
result = value * 72.0 / dpi
|
||||
elif unit == 'in':
|
||||
result = value * 72.0
|
||||
elif unit == 'pt':
|
||||
result = value
|
||||
elif unit == 'em':
|
||||
result = value * font
|
||||
elif unit in ('ex', 'en'):
|
||||
# This is a hack for ex since we have no way to know
|
||||
# the x-height of the font
|
||||
font = font
|
||||
result = value * font * 0.5
|
||||
elif unit == 'pc':
|
||||
result = value * 12.0
|
||||
elif unit == 'mm':
|
||||
result = value * 0.04
|
||||
elif unit == 'cm':
|
||||
result = value * 0.40
|
||||
return result
|
||||
|
||||
|
||||
|
@ -19,12 +19,12 @@ class CHMInput(InputFormatPlugin):
|
||||
description = 'Convert CHM files to OEB'
|
||||
file_types = set(['chm'])
|
||||
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||
def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
|
||||
from calibre.ebooks.chm.reader import CHMReader
|
||||
log.debug('Opening CHM file')
|
||||
rdr = CHMReader(chm_path, log, self.opts)
|
||||
log.debug('Extracting CHM to %s' % output_dir)
|
||||
rdr.extract_content(output_dir)
|
||||
rdr.extract_content(output_dir, debug_dump=debug_dump)
|
||||
self._chm_reader = rdr
|
||||
return rdr.hhc_path
|
||||
|
||||
@ -47,12 +47,20 @@ class CHMInput(InputFormatPlugin):
|
||||
stream.close()
|
||||
log.debug('tdir=%s' % tdir)
|
||||
log.debug('stream.name=%s' % stream.name)
|
||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
||||
debug_dump = False
|
||||
odi = options.debug_pipeline
|
||||
if odi:
|
||||
debug_dump = os.path.join(odi, 'input')
|
||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log,
|
||||
debug_dump=debug_dump)
|
||||
mainpath = os.path.join(tdir, mainname)
|
||||
|
||||
metadata = get_metadata_from_reader(self._chm_reader)
|
||||
self._chm_reader.CloseCHM()
|
||||
#print tdir
|
||||
#from calibre import ipython
|
||||
#ipython()
|
||||
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
options.input_encoding = 'utf-8'
|
||||
# try a custom conversion:
|
||||
|
@ -5,8 +5,8 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||
|
||||
import os, re
|
||||
from mimetypes import guess_type as guess_mimetype
|
||||
|
||||
from calibre import guess_type as guess_mimetype
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||
from calibre.constants import iswindows, filesystem_encoding
|
||||
from calibre.utils.chm.chm import CHMFile
|
||||
@ -97,7 +97,7 @@ class CHMReader(CHMFile):
|
||||
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
||||
return data
|
||||
|
||||
def ExtractFiles(self, output_dir=os.getcwdu()):
|
||||
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
|
||||
html_files = set([])
|
||||
for path in self.Contents():
|
||||
lpath = os.path.join(output_dir, path)
|
||||
@ -123,6 +123,9 @@ class CHMReader(CHMFile):
|
||||
self.log.warn('%r filename too long, skipping'%path)
|
||||
continue
|
||||
raise
|
||||
if debug_dump:
|
||||
import shutil
|
||||
shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
|
||||
for lpath in html_files:
|
||||
with open(lpath, 'r+b') as f:
|
||||
data = f.read()
|
||||
@ -147,7 +150,8 @@ class CHMReader(CHMFile):
|
||||
if self.hhc_path == '.hhc' and self.hhc_path not in files:
|
||||
from calibre import walk
|
||||
for x in walk(output_dir):
|
||||
if os.path.basename(x).lower() in ('index.htm', 'index.html'):
|
||||
if os.path.basename(x).lower() in ('index.htm', 'index.html',
|
||||
'contents.htm', 'contents.html'):
|
||||
self.hhc_path = os.path.relpath(x, output_dir)
|
||||
break
|
||||
|
||||
@ -248,8 +252,8 @@ class CHMReader(CHMFile):
|
||||
if not os.path.isdir(dir):
|
||||
os.makedirs(dir)
|
||||
|
||||
def extract_content(self, output_dir=os.getcwdu()):
|
||||
self.ExtractFiles(output_dir=output_dir)
|
||||
def extract_content(self, output_dir=os.getcwdu(), debug_dump=False):
|
||||
self.ExtractFiles(output_dir=output_dir, debug_dump=debug_dump)
|
||||
|
||||
|
||||
|
||||
|
@ -12,6 +12,7 @@ from Queue import Empty
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre import extract, CurrentDir, prints
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.ipc.server import Server
|
||||
from calibre.utils.ipc.job import ParallelJob
|
||||
@ -21,6 +22,10 @@ def extract_comic(path_to_comic_file):
|
||||
Un-archive the comic file.
|
||||
'''
|
||||
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
|
||||
if not isinstance(tdir, unicode):
|
||||
# Needed in case the zip file has wrongly encoded unicode file/dir
|
||||
# names
|
||||
tdir = tdir.decode(filesystem_encoding)
|
||||
extract(path_to_comic_file, tdir)
|
||||
return tdir
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#define BUFFER 6000
|
||||
|
||||
#define MIN(x, y) ( ((x) < (y)) ? (x) : (y) )
|
||||
#define MAX(x, y) ( ((x) > (y)) ? (x) : (y) )
|
||||
|
||||
typedef unsigned short int Byte;
|
||||
typedef struct {
|
||||
@ -53,7 +54,7 @@ cpalmdoc_decompress(PyObject *self, PyObject *args) {
|
||||
// Map chars to bytes
|
||||
for (j = 0; j < input_len; j++)
|
||||
input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
|
||||
output = (char *)PyMem_Malloc(sizeof(char)*BUFFER);
|
||||
output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 5*input_len)));
|
||||
if (output == NULL) return PyErr_NoMemory();
|
||||
|
||||
while (i < input_len) {
|
||||
|
@ -14,7 +14,8 @@ from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre import extract, walk, isbytestring, filesystem_encoding
|
||||
from calibre import (extract, walk, isbytestring, filesystem_encoding,
|
||||
get_types_map)
|
||||
from calibre.constants import __version__
|
||||
|
||||
DEBUG_README=u'''
|
||||
@ -853,7 +854,8 @@ OptionRecommendation(name='sr3_replace',
|
||||
if isinstance(ret, basestring):
|
||||
shutil.copytree(output_dir, out_dir)
|
||||
else:
|
||||
os.makedirs(out_dir)
|
||||
if not os.path.exists(out_dir):
|
||||
os.makedirs(out_dir)
|
||||
self.dump_oeb(ret, out_dir)
|
||||
if self.input_fmt == 'recipe':
|
||||
zf = ZipFile(os.path.join(self.opts.debug_pipeline,
|
||||
@ -875,6 +877,9 @@ OptionRecommendation(name='sr3_replace',
|
||||
if self.opts.verbose:
|
||||
self.log.filter_level = self.log.DEBUG
|
||||
self.flush()
|
||||
import cssutils, logging
|
||||
cssutils.log.setLevel(logging.WARN)
|
||||
get_types_map() # Ensure the mimetypes module is intialized
|
||||
|
||||
if self.opts.debug_pipeline is not None:
|
||||
self.opts.verbose = max(self.opts.verbose, 4)
|
||||
|
@ -399,10 +399,10 @@ class HTMLPreProcessor(object):
|
||||
(re.compile(u'˙\s*(<br.*?>)*\s*Z', re.UNICODE), lambda match: u'Ż'),
|
||||
|
||||
# If pdf printed from a browser then the header/footer has a reliable pattern
|
||||
(re.compile(r'((?<=</a>)\s*file:////?[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
|
||||
|
||||
# Center separator lines
|
||||
(re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
|
||||
(re.compile(u'<br>\s*(?P<break>([*#•✦=] *){3,})\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group('break') + '</p>'),
|
||||
|
||||
# Remove page links
|
||||
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
||||
|
@ -156,17 +156,17 @@ class HeuristicProcessor(object):
|
||||
]
|
||||
|
||||
ITALICIZE_STYLE_PATS = [
|
||||
r'(?msu)(?<=[\s>])_(?P<words>[^_]+)_',
|
||||
r'(?msu)(?<=[\s>])/(?P<words>[^/\*>]+)/',
|
||||
r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)~~',
|
||||
r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)\*',
|
||||
r'(?msu)(?<=[\s>])~(?P<words>[^~]+)~',
|
||||
r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)/_',
|
||||
r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)\*_',
|
||||
r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)/\*',
|
||||
r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)/\*_',
|
||||
r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+):/',
|
||||
r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+):\|',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*>]+)/',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/',
|
||||
ur'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|',
|
||||
]
|
||||
|
||||
for word in ITALICIZE_WORDS:
|
||||
@ -518,13 +518,13 @@ class HeuristicProcessor(object):
|
||||
if re.findall('(<|>)', replacement_break):
|
||||
if re.match('^<hr', replacement_break):
|
||||
if replacement_break.find('width') != -1:
|
||||
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
|
||||
replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
|
||||
divpercent = (100 - width) / 2
|
||||
hr_open = re.sub('45', str(divpercent), hr_open)
|
||||
scene_break = hr_open+replacement_break+'</div>'
|
||||
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
|
||||
replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
|
||||
divpercent = (100 - width) / 2
|
||||
hr_open = re.sub('45', str(divpercent), hr_open)
|
||||
scene_break = hr_open+replacement_break+'</div>'
|
||||
else:
|
||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||
elif re.match('^<img', replacement_break):
|
||||
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||
else:
|
||||
@ -584,10 +584,10 @@ class HeuristicProcessor(object):
|
||||
#print "styles for this line are: "+str(styles)
|
||||
split_styles = []
|
||||
for style in styles:
|
||||
#print "style is: "+str(style)
|
||||
newstyle = style.split(':')
|
||||
#print "newstyle is: "+str(newstyle)
|
||||
split_styles.append(newstyle)
|
||||
#print "style is: "+str(style)
|
||||
newstyle = style.split(':')
|
||||
#print "newstyle is: "+str(newstyle)
|
||||
split_styles.append(newstyle)
|
||||
styles = split_styles
|
||||
for style, setting in styles:
|
||||
if style == 'text-align' and setting != 'left':
|
||||
@ -764,6 +764,7 @@ class HeuristicProcessor(object):
|
||||
# Multiple sequential blank paragraphs are merged with appropriate margins
|
||||
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
|
||||
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
||||
html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html)
|
||||
html = self.detect_whitespace(html)
|
||||
html = self.detect_soft_breaks(html)
|
||||
blanks_count = len(self.any_multi_blank.findall(html))
|
||||
|
@ -175,18 +175,18 @@ class EPUBInput(InputFormatPlugin):
|
||||
raise ValueError(
|
||||
'EPUB files with DTBook markup are not supported')
|
||||
|
||||
not_for_spine = set()
|
||||
for y in opf.itermanifest():
|
||||
id_ = y.get('id', None)
|
||||
if id_ and y.get('media-type', None) in \
|
||||
('application/vnd.adobe-page-template+xml',):
|
||||
not_for_spine.add(id_)
|
||||
|
||||
for x in list(opf.iterspine()):
|
||||
ref = x.get('idref', None)
|
||||
if ref is None:
|
||||
if ref is None or ref in not_for_spine:
|
||||
x.getparent().remove(x)
|
||||
continue
|
||||
for y in opf.itermanifest():
|
||||
if y.get('id', None) == ref and y.get('media-type', None) in \
|
||||
('application/vnd.adobe-page-template+xml',):
|
||||
p = x.getparent()
|
||||
if p is not None:
|
||||
p.remove(x)
|
||||
break
|
||||
|
||||
with open('content.opf', 'wb') as nopf:
|
||||
nopf.write(opf.render())
|
||||
|
@ -10,7 +10,6 @@ Transform OEB content into FB2 markup
|
||||
|
||||
from base64 import b64encode
|
||||
from datetime import datetime
|
||||
from mimetypes import types_map
|
||||
import re
|
||||
import uuid
|
||||
|
||||
@ -18,9 +17,6 @@ from lxml import etree
|
||||
|
||||
from calibre import prepare_string_for_xml
|
||||
from calibre.constants import __appname__, __version__
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES, OPF
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class FB2MLizer(object):
|
||||
@ -71,7 +67,7 @@ class FB2MLizer(object):
|
||||
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
|
||||
|
||||
def clean_text(self, text):
|
||||
# Condense empty paragraphs into a line break.
|
||||
# Condense empty paragraphs into a line break.
|
||||
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line />', text)
|
||||
# Remove empty paragraphs.
|
||||
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
|
||||
@ -100,6 +96,7 @@ class FB2MLizer(object):
|
||||
return text
|
||||
|
||||
def fb2_header(self):
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
metadata = {}
|
||||
metadata['title'] = self.oeb_book.metadata.title[0].value
|
||||
metadata['appname'] = __appname__
|
||||
@ -180,6 +177,8 @@ class FB2MLizer(object):
|
||||
return u'</FictionBook>'
|
||||
|
||||
def get_cover(self):
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||
|
||||
cover_href = None
|
||||
|
||||
# Get the raster cover if it's available.
|
||||
@ -213,6 +212,8 @@ class FB2MLizer(object):
|
||||
return u''
|
||||
|
||||
def get_text(self):
|
||||
from calibre.ebooks.oeb.base import XHTML
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
text = ['<body>']
|
||||
|
||||
# Create main section if there are no others to create
|
||||
@ -248,6 +249,8 @@ class FB2MLizer(object):
|
||||
'''
|
||||
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||
|
||||
images = []
|
||||
for item in self.oeb_book.manifest:
|
||||
# Don't write the image if it's not referenced in the document's text.
|
||||
@ -255,7 +258,7 @@ class FB2MLizer(object):
|
||||
continue
|
||||
if item.media_type in OEB_RASTER_IMAGES:
|
||||
try:
|
||||
if not item.media_type == types_map['.jpeg'] or not item.media_type == types_map['.jpg']:
|
||||
if item.media_type != 'image/jpeg':
|
||||
im = Image()
|
||||
im.load(item.data)
|
||||
im.set_compression_quality(70)
|
||||
@ -344,6 +347,8 @@ class FB2MLizer(object):
|
||||
|
||||
@return: List of string representing the XHTML converted to FB2 markup.
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
|
||||
|
||||
# Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace.
|
||||
if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS:
|
||||
return []
|
||||
|
@ -309,13 +309,14 @@ class HTMLInput(InputFormatPlugin):
|
||||
|
||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
from calibre.ebooks.oeb.base import DirContainer, \
|
||||
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, \
|
||||
xpath
|
||||
from calibre.ebooks.oeb.base import (DirContainer,
|
||||
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
|
||||
xpath)
|
||||
from calibre import guess_type
|
||||
from calibre.ebooks.oeb.transforms.metadata import \
|
||||
meta_info_to_oeb_metadata
|
||||
import cssutils
|
||||
import cssutils, logging
|
||||
cssutils.log.setLevel(logging.WARN)
|
||||
self.OEB_STYLES = OEB_STYLES
|
||||
oeb = create_oebbook(log, None, opts, self,
|
||||
encoding=opts.input_encoding, populate=False)
|
||||
@ -344,7 +345,8 @@ class HTMLInput(InputFormatPlugin):
|
||||
htmlfile_map = {}
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
oeb.container = DirContainer(os.path.dirname(path), log)
|
||||
oeb.container = DirContainer(os.path.dirname(path), log,
|
||||
ignore_opf=True)
|
||||
bname = os.path.basename(path)
|
||||
id, href = oeb.manifest.generate(id='html',
|
||||
href=ascii_filename(bname))
|
||||
@ -368,7 +370,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
for f in filelist:
|
||||
path = f.path
|
||||
dpath = os.path.dirname(path)
|
||||
oeb.container = DirContainer(dpath, log)
|
||||
oeb.container = DirContainer(dpath, log, ignore_opf=True)
|
||||
item = oeb.manifest.hrefs[htmlfile_map[path]]
|
||||
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
|
||||
|
||||
@ -408,7 +410,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
if not item.linear: continue
|
||||
toc.add(title, item.href)
|
||||
|
||||
oeb.container = DirContainer(os.getcwdu(), oeb.log)
|
||||
oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
|
||||
return oeb
|
||||
|
||||
def link_to_local_path(self, link_, base=None):
|
||||
@ -455,7 +457,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
href=bhref)
|
||||
self.oeb.log.debug('Added', link)
|
||||
self.oeb.container = self.DirContainer(os.path.dirname(link),
|
||||
self.oeb.log)
|
||||
self.oeb.log, ignore_opf=True)
|
||||
# Load into memory
|
||||
guessed = self.guess_type(href)[0]
|
||||
media_type = guessed or self.BINARY_MIME
|
||||
|
@ -4,7 +4,6 @@ __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
|
||||
|
||||
class EasyMeta(object):
|
||||
|
||||
@ -12,6 +11,7 @@ class EasyMeta(object):
|
||||
self.meta = meta
|
||||
|
||||
def __iter__(self):
|
||||
from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
|
||||
meta = self.meta
|
||||
for item_name in meta.items:
|
||||
for item in meta[item_name]:
|
||||
|
@ -12,7 +12,6 @@ from os.path import dirname, abspath, relpath, exists, basename
|
||||
from lxml import etree
|
||||
from templite import Templite
|
||||
|
||||
from calibre.ebooks.oeb.base import element
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre import CurrentDir
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
@ -51,6 +50,7 @@ class HTMLOutput(OutputFormatPlugin):
|
||||
'''
|
||||
Generate table of contents
|
||||
'''
|
||||
from calibre.ebooks.oeb.base import element
|
||||
with CurrentDir(output_dir):
|
||||
def build_node(current_node, parent=None):
|
||||
if parent is None:
|
||||
|
@ -10,6 +10,7 @@ import os
|
||||
|
||||
from calibre import walk
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
class HTMLZInput(InputFormatPlugin):
|
||||
@ -34,6 +35,13 @@ class HTMLZInput(InputFormatPlugin):
|
||||
html = tf.read()
|
||||
break
|
||||
|
||||
# Encoding
|
||||
if options.input_encoding:
|
||||
ienc = options.input_encoding
|
||||
else:
|
||||
ienc = xml_to_unicode(html[:4096])[-1]
|
||||
html = html.decode(ienc, 'replace')
|
||||
|
||||
# Run the HTML through the html processing plugin.
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
html_input = plugin_for_input_format('html')
|
||||
|
@ -12,7 +12,6 @@ from lxml import etree
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
@ -42,6 +41,8 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
|
||||
|
||||
# HTML
|
||||
if opts.htmlz_css_type == 'inline':
|
||||
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
|
||||
@ -71,9 +72,13 @@ class HTMLZOutput(OutputFormatPlugin):
|
||||
os.makedirs(os.path.join(tdir, 'images'))
|
||||
for item in oeb_book.manifest:
|
||||
if item.media_type in OEB_IMAGES and item.href in images:
|
||||
if item.media_type == SVG_MIME:
|
||||
data = unicode(etree.tostring(item.data, encoding=unicode))
|
||||
else:
|
||||
data = item.data
|
||||
fname = os.path.join(tdir, 'images', images[item.href])
|
||||
with open(fname, 'wb') as img:
|
||||
img.write(item.data)
|
||||
img.write(data)
|
||||
|
||||
# Metadata
|
||||
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
|
||||
|
@ -6,8 +6,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, textwrap, sys
|
||||
from copy import deepcopy
|
||||
import os, textwrap, sys, operator
|
||||
from copy import deepcopy, copy
|
||||
|
||||
from lxml import etree
|
||||
|
||||
@ -149,9 +149,65 @@ class TextBlock(etree.XSLTExtension):
|
||||
self.root = root
|
||||
self.parent = root
|
||||
self.add_text_to = (self.parent, 'text')
|
||||
self.fix_deep_nesting(node)
|
||||
for child in node:
|
||||
self.process_child(child)
|
||||
|
||||
def fix_deep_nesting(self, node):
|
||||
deepest = 1
|
||||
|
||||
def depth(node):
|
||||
parent = node.getparent()
|
||||
ans = 1
|
||||
while parent is not None:
|
||||
ans += 1
|
||||
parent = parent.getparent()
|
||||
return ans
|
||||
|
||||
for span in node.xpath('descendant::Span'):
|
||||
d = depth(span)
|
||||
if d > deepest:
|
||||
deepest = d
|
||||
if d > 500:
|
||||
break
|
||||
|
||||
if deepest < 500:
|
||||
return
|
||||
|
||||
self.log.warn('Found deeply nested spans. Flattening.')
|
||||
#with open('/t/before.xml', 'wb') as f:
|
||||
# f.write(etree.tostring(node, method='xml'))
|
||||
|
||||
spans = [(depth(span), span) for span in node.xpath('descendant::Span')]
|
||||
spans.sort(key=operator.itemgetter(0), reverse=True)
|
||||
|
||||
for depth, span in spans:
|
||||
if depth < 3:
|
||||
continue
|
||||
p = span.getparent()
|
||||
gp = p.getparent()
|
||||
idx = p.index(span)
|
||||
pidx = gp.index(p)
|
||||
children = list(p)[idx:]
|
||||
t = children[-1].tail
|
||||
t = t if t else ''
|
||||
children[-1].tail = t + (p.tail if p.tail else '')
|
||||
p.tail = ''
|
||||
pattrib = dict(**p.attrib) if p.tag == 'Span' else {}
|
||||
for child in children:
|
||||
p.remove(child)
|
||||
if pattrib and child.tag == "Span":
|
||||
attrib = copy(pattrib)
|
||||
attrib.update(child.attrib)
|
||||
child.attrib.update(attrib)
|
||||
|
||||
|
||||
for child in reversed(children):
|
||||
gp.insert(pidx+1, child)
|
||||
|
||||
#with open('/t/after.xml', 'wb') as f:
|
||||
# f.write(etree.tostring(node, method='xml'))
|
||||
|
||||
def add_text(self, text):
|
||||
if text:
|
||||
if getattr(self.add_text_to[0], self.add_text_to[1]) is None:
|
||||
|
@ -6,11 +6,11 @@ __docformat__ = 'restructuredtext en'
|
||||
"""
|
||||
Provides abstraction for metadata reading.writing from a variety of ebook formats.
|
||||
"""
|
||||
import os, mimetypes, sys, re
|
||||
import os, sys, re
|
||||
from urllib import unquote, quote
|
||||
from urlparse import urlparse
|
||||
|
||||
from calibre import relpath
|
||||
from calibre import relpath, guess_type
|
||||
|
||||
from calibre.utils.config import tweaks
|
||||
|
||||
@ -118,7 +118,7 @@ class Resource(object):
|
||||
self.path = None
|
||||
self.fragment = ''
|
||||
try:
|
||||
self.mime_type = mimetypes.guess_type(href_or_path)[0]
|
||||
self.mime_type = guess_type(href_or_path)[0]
|
||||
except:
|
||||
self.mime_type = None
|
||||
if self.mime_type is None:
|
||||
|
@ -19,6 +19,9 @@ from calibre.utils.date import isoformat, format_date
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.formatter import TemplateFormatter
|
||||
|
||||
def human_readable(size, precision=2):
|
||||
""" Convert a size in bytes into megabytes """
|
||||
return ('%.'+str(precision)+'f'+ 'MB') % ((size/(1024.*1024.)),)
|
||||
|
||||
NULL_VALUES = {
|
||||
'user_metadata': {},
|
||||
@ -65,7 +68,19 @@ composite_formatter = SafeFormat()
|
||||
class Metadata(object):
|
||||
|
||||
'''
|
||||
A class representing all the metadata for a book.
|
||||
A class representing all the metadata for a book. The various standard metadata
|
||||
fields are available as attributes of this object. You can also stick
|
||||
arbitrary attributes onto this object.
|
||||
|
||||
Metadata from custom columns should be accessed via the get() method,
|
||||
passing in the lookup name for the column, for example: "#mytags".
|
||||
|
||||
Use the :meth:`is_null` method to test if a filed is null.
|
||||
|
||||
This object also has functions to format fields into strings.
|
||||
|
||||
The list of standard metadata fields grows with time is in
|
||||
:data:`STANDARD_METADATA_FIELDS`.
|
||||
|
||||
Please keep the method based API of this class to a minimum. Every method
|
||||
becomes a reserved field name.
|
||||
@ -85,11 +100,19 @@ class Metadata(object):
|
||||
if title:
|
||||
self.title = title
|
||||
if authors:
|
||||
#: List of strings or []
|
||||
# List of strings or []
|
||||
self.author = list(authors) if authors else []# Needed for backward compatibility
|
||||
self.authors = list(authors) if authors else []
|
||||
|
||||
def is_null(self, field):
|
||||
'''
|
||||
Return True if the value of filed is null in this object.
|
||||
'null' means it is unknown or evaluates to False. So a title of
|
||||
_('Unknown') is null or a language of 'und' is null.
|
||||
|
||||
Be careful with numeric fields since this will return True for zero as
|
||||
well as None.
|
||||
'''
|
||||
null_val = NULL_VALUES.get(field, None)
|
||||
val = getattr(self, field, None)
|
||||
return not val or val == null_val
|
||||
@ -117,7 +140,11 @@ class Metadata(object):
|
||||
_('TEMPLATE ERROR'),
|
||||
self).strip()
|
||||
return val
|
||||
|
||||
if field.startswith('#') and field.endswith('_index'):
|
||||
try:
|
||||
return self.get_extra(field[:-6])
|
||||
except:
|
||||
pass
|
||||
raise AttributeError(
|
||||
'Metadata object has no attribute named: '+ repr(field))
|
||||
|
||||
@ -167,11 +194,6 @@ class Metadata(object):
|
||||
try:
|
||||
return self.__getattribute__(field)
|
||||
except AttributeError:
|
||||
if field.startswith('#') and field.endswith('_index'):
|
||||
try:
|
||||
return self.get_extra(field[:-6])
|
||||
except:
|
||||
pass
|
||||
return default
|
||||
|
||||
def get_extra(self, field, default=None):
|
||||
@ -483,7 +505,7 @@ class Metadata(object):
|
||||
self_tags = self.get(x, [])
|
||||
self.set_user_metadata(x, meta) # get... did the deepcopy
|
||||
other_tags = other.get(x, [])
|
||||
if meta['is_multiple']:
|
||||
if meta['datatype'] == 'text' and meta['is_multiple']:
|
||||
# Case-insensitive but case preserving merging
|
||||
lotags = [t.lower() for t in other_tags]
|
||||
lstags = [t.lower() for t in self_tags]
|
||||
@ -541,17 +563,25 @@ class Metadata(object):
|
||||
def format_tags(self):
|
||||
return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])
|
||||
|
||||
def format_rating(self):
|
||||
return unicode(self.rating)
|
||||
def format_rating(self, v=None, divide_by=1.0):
|
||||
if v is None:
|
||||
if self.rating is not None:
|
||||
return unicode(self.rating/divide_by)
|
||||
return u'None'
|
||||
return unicode(v/divide_by)
|
||||
|
||||
def format_field(self, key, series_with_index=True):
|
||||
'''
|
||||
Returns the tuple (display_name, formatted_value)
|
||||
'''
|
||||
name, val, ign, ign = self.format_field_extended(key, series_with_index)
|
||||
return (name, val)
|
||||
|
||||
def format_field_extended(self, key, series_with_index=True):
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
'''
|
||||
returns the tuple (field_name, formatted_value)
|
||||
returns the tuple (display_name, formatted_value, original_value,
|
||||
field_metadata)
|
||||
'''
|
||||
|
||||
# Handle custom series index
|
||||
@ -592,7 +622,7 @@ class Metadata(object):
|
||||
elif datatype == 'bool':
|
||||
res = _('Yes') if res else _('No')
|
||||
elif datatype == 'rating':
|
||||
res = res/2
|
||||
res = res/2.0
|
||||
return (name, unicode(res), orig_res, cmeta)
|
||||
|
||||
# convert top-level ids into their value
|
||||
@ -625,11 +655,19 @@ class Metadata(object):
|
||||
res = res + ' [%s]'%self.format_series_index()
|
||||
elif datatype == 'datetime':
|
||||
res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
|
||||
elif datatype == 'rating':
|
||||
res = res/2.0
|
||||
elif key == 'size':
|
||||
res = human_readable(res)
|
||||
return (name, unicode(res), orig_res, fmeta)
|
||||
|
||||
return (None, None, None, None)
|
||||
|
||||
def __unicode__(self):
|
||||
'''
|
||||
A string representation of this object, suitable for printing to
|
||||
console
|
||||
'''
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
ans = []
|
||||
def fmt(x, y):
|
||||
@ -673,6 +711,9 @@ class Metadata(object):
|
||||
return u'\n'.join(ans)
|
||||
|
||||
def to_html(self):
|
||||
'''
|
||||
A HTML representation of this object.
|
||||
'''
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
ans = [(_('Title'), unicode(self.title))]
|
||||
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
|
||||
|
@ -8,12 +8,13 @@ Read meta information from extZ (TXTZ, HTMLZ...) files.
|
||||
'''
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
|
||||
from cStringIO import StringIO
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.zipfile import ZipFile, safe_replace
|
||||
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
@ -23,16 +24,75 @@ def get_metadata(stream, extract_cover=True):
|
||||
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||
stream.seek(0)
|
||||
|
||||
with TemporaryDirectory('_untxtz_mdata') as tdir:
|
||||
try:
|
||||
zf = ZipFile(stream)
|
||||
zf.extract('metadata.opf', tdir)
|
||||
with open(os.path.join(tdir, 'metadata.opf'), 'rb') as opff:
|
||||
mi = OPF(opff).to_book_metadata()
|
||||
except:
|
||||
return mi
|
||||
try:
|
||||
with ZipFile(stream) as zf:
|
||||
opf_name = get_first_opf_name(zf)
|
||||
opf_stream = StringIO(zf.read(opf_name))
|
||||
opf = OPF(opf_stream)
|
||||
mi = opf.to_book_metadata()
|
||||
if extract_cover:
|
||||
cover_name = opf.raster_cover
|
||||
if cover_name:
|
||||
mi.cover_data = ('jpg', zf.read(cover_name))
|
||||
except:
|
||||
return mi
|
||||
return mi
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
opf = StringIO(metadata_to_opf(mi))
|
||||
safe_replace(stream, 'metadata.opf', opf)
|
||||
replacements = {}
|
||||
|
||||
# Get the OPF in the archive.
|
||||
with ZipFile(stream) as zf:
|
||||
opf_path = get_first_opf_name(zf)
|
||||
opf_stream = StringIO(zf.read(opf_path))
|
||||
opf = OPF(opf_stream)
|
||||
|
||||
# Cover.
|
||||
new_cdata = None
|
||||
try:
|
||||
new_cdata = mi.cover_data[1]
|
||||
if not new_cdata:
|
||||
raise Exception('no cover')
|
||||
except:
|
||||
try:
|
||||
new_cdata = open(mi.cover, 'rb').read()
|
||||
except:
|
||||
pass
|
||||
if new_cdata:
|
||||
raster_cover = opf.raster_cover
|
||||
if not raster_cover:
|
||||
raster_cover = 'cover.jpg'
|
||||
cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover)
|
||||
new_cover = _write_new_cover(new_cdata, cpath)
|
||||
replacements[cpath] = open(new_cover.name, 'rb')
|
||||
|
||||
# Update the metadata.
|
||||
opf.smart_update(mi, replace_metadata=True)
|
||||
newopf = StringIO(opf.render())
|
||||
safe_replace(stream, opf_path, newopf, extra_replacements=replacements)
|
||||
|
||||
# Cleanup temporary files.
|
||||
try:
|
||||
if cpath is not None:
|
||||
replacements[cpath].close()
|
||||
os.remove(replacements[cpath].name)
|
||||
except:
|
||||
pass
|
||||
|
||||
def get_first_opf_name(zf):
|
||||
names = zf.namelist()
|
||||
opfs = []
|
||||
for n in names:
|
||||
if n.endswith('.opf') and '/' not in n:
|
||||
opfs.append(n)
|
||||
if not opfs:
|
||||
raise Exception('No OPF found')
|
||||
opfs.sort()
|
||||
return opfs[0]
|
||||
|
||||
def _write_new_cover(new_cdata, cpath):
|
||||
from calibre.utils.magick.draw import save_cover_data_to
|
||||
new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1])
|
||||
new_cover.close()
|
||||
save_cover_data_to(new_cdata, new_cover.name)
|
||||
return new_cover
|
||||
|
@ -5,11 +5,12 @@ __copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
|
||||
|
||||
'''Read meta information from fb2 files'''
|
||||
|
||||
import mimetypes, os
|
||||
import os
|
||||
from base64 import b64decode
|
||||
from lxml import etree
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre import guess_all_extensions
|
||||
|
||||
XLINK_NS = 'http://www.w3.org/1999/xlink'
|
||||
def XLINK(name):
|
||||
@ -71,7 +72,7 @@ def get_metadata(stream):
|
||||
binary = XPath('//fb2:binary[@id="%s"]'%id)(root)
|
||||
if binary:
|
||||
mt = binary[0].get('content-type', 'image/jpeg')
|
||||
exts = mimetypes.guess_all_extensions(mt)
|
||||
exts = guess_all_extensions(mt)
|
||||
if not exts:
|
||||
exts = ['.jpg']
|
||||
cdata = (exts[0][1:], b64decode(tostring(binary[0])))
|
||||
|
@ -259,6 +259,7 @@ class MetadataUpdater(object):
|
||||
trail = len(new_record0.getvalue()) % 4
|
||||
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
|
||||
new_record0.write(pad)
|
||||
new_record0.write('\0'*(1024*8))
|
||||
|
||||
# Rebuild the stream, update the pdbrecords pointers
|
||||
self.patchSection(0,new_record0.getvalue())
|
||||
@ -399,7 +400,8 @@ class MetadataUpdater(object):
|
||||
if getattr(self, 'exth', None) is None:
|
||||
raise MobiError('No existing EXTH record. Cannot update metadata.')
|
||||
|
||||
self.record0[92:96] = iana2mobi(mi.language)
|
||||
if not mi.is_null('language'):
|
||||
self.record0[92:96] = iana2mobi(mi.language)
|
||||
self.create_exth(exth=exth, new_title=mi.title)
|
||||
|
||||
# Fetch updated timestamp, cover_record, thumbnail_record
|
||||
|
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
lxml based OPF parser.
|
||||
'''
|
||||
|
||||
import re, sys, unittest, functools, os, mimetypes, uuid, glob, cStringIO, json
|
||||
import re, sys, unittest, functools, os, uuid, glob, cStringIO, json
|
||||
from urllib import unquote
|
||||
from urlparse import urlparse
|
||||
|
||||
@ -20,7 +20,7 @@ from calibre.ebooks.metadata import string_to_authors, MetaInformation, check_is
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import parse_date, isoformat
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre import prints
|
||||
from calibre import prints, guess_type
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
|
||||
class Resource(object): # {{{
|
||||
@ -42,7 +42,7 @@ class Resource(object): # {{{
|
||||
self.path = None
|
||||
self.fragment = ''
|
||||
try:
|
||||
self.mime_type = mimetypes.guess_type(href_or_path)[0]
|
||||
self.mime_type = guess_type(href_or_path)[0]
|
||||
except:
|
||||
self.mime_type = None
|
||||
if self.mime_type is None:
|
||||
@ -1000,7 +1000,7 @@ class OPF(object): # {{{
|
||||
for t in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
|
||||
for item in self.guide:
|
||||
if item.type.lower() == t:
|
||||
self.create_manifest_item(item.href(), mimetypes.guess_type(path)[0])
|
||||
self.create_manifest_item(item.href(), guess_type(path)[0])
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
Read meta information from eReader pdb files.
|
||||
Read meta information from pdb files.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
@ -13,10 +13,12 @@ import re
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.metadata.ereader import get_metadata as get_eReader
|
||||
from calibre.ebooks.metadata.plucker import get_metadata as get_plucker
|
||||
|
||||
MREADER = {
|
||||
'PNPdPPrs' : get_eReader,
|
||||
'PNRdPPrs' : get_eReader,
|
||||
'DataPlkr' : get_plucker,
|
||||
}
|
||||
|
||||
from calibre.ebooks.metadata.ereader import set_metadata as set_eReader
|
||||
|
73
src/calibre/ebooks/metadata/plucker.py
Normal file
@ -0,0 +1,73 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||
|
||||
'''
|
||||
Read meta information from Plucker pdb files.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.pdb.plucker.reader import SectionHeader, DATATYPE_METADATA, \
|
||||
MIBNUM_TO_NAME
|
||||
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
'''
|
||||
Return metadata as a L{MetaInfo} object
|
||||
'''
|
||||
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||
stream.seek(0)
|
||||
|
||||
pheader = PdbHeaderReader(stream)
|
||||
section_data = None
|
||||
for i in range(1, pheader.num_sections):
|
||||
raw_data = pheader.section_data(i)
|
||||
section_header = SectionHeader(raw_data)
|
||||
if section_header.type == DATATYPE_METADATA:
|
||||
section_data = raw_data[8:]
|
||||
break
|
||||
|
||||
if not section_data:
|
||||
return mi
|
||||
|
||||
default_encoding = 'latin-1'
|
||||
record_count, = struct.unpack('>H', section_data[0:2])
|
||||
adv = 0
|
||||
title = None
|
||||
author = None
|
||||
pubdate = 0
|
||||
for i in xrange(record_count):
|
||||
type, = struct.unpack('>H', section_data[2+adv:4+adv])
|
||||
length, = struct.unpack('>H', section_data[4+adv:6+adv])
|
||||
|
||||
# CharSet
|
||||
if type == 1:
|
||||
val, = struct.unpack('>H', section_data[6+adv:8+adv])
|
||||
default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
|
||||
# Author
|
||||
elif type == 4:
|
||||
author = section_data[6+adv+(2*length)]
|
||||
# Title
|
||||
elif type == 5:
|
||||
title = section_data[6+adv+(2*length)]
|
||||
# Publication Date
|
||||
elif type == 6:
|
||||
pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])
|
||||
|
||||
adv += 2*length
|
||||
|
||||
if title:
|
||||
mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
|
||||
if author:
|
||||
author = author.replace('\0', '').decode(default_encoding, 'replace')
|
||||
mi.author = author.split(',')
|
||||
mi.pubdate = datetime.fromtimestamp(pubdate)
|
||||
|
||||
return mi
|
@ -301,7 +301,7 @@ class Amazon(Source):
|
||||
if asin is None:
|
||||
asin = identifiers.get('asin', None)
|
||||
if asin:
|
||||
return 'http://amzn.com/%s'%asin
|
||||
return ('amazon', asin, 'http://amzn.com/%s'%asin)
|
||||
# }}}
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||
|
@ -24,6 +24,8 @@ msprefs.defaults['ignore_fields'] = []
|
||||
msprefs.defaults['max_tags'] = 20
|
||||
msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
|
||||
msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
|
||||
msprefs.defaults['swap_author_names'] = False
|
||||
msprefs.defaults['fewer_tags'] = True
|
||||
|
||||
# Google covers are often poor quality (scans/errors) but they have high
|
||||
# resolution, so they trump covers from better sources. So make sure they
|
||||
@ -54,7 +56,8 @@ class InternalMetadataCompareKeyGen(object):
|
||||
|
||||
'''
|
||||
Generate a sort key for comparison of the relevance of Metadata objects,
|
||||
given a search query.
|
||||
given a search query. This is used only to compare results from the same
|
||||
metadata source, not across different sources.
|
||||
|
||||
The sort key ensures that an ascending order sort is a sort by order of
|
||||
decreasing relevance.
|
||||
@ -181,6 +184,10 @@ class Source(Plugin):
|
||||
#: construct the configuration widget for this plugin
|
||||
options = ()
|
||||
|
||||
#: A string that is displayed at the top of the config widget for this
|
||||
#: plugin
|
||||
config_help_message = None
|
||||
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
Plugin.__init__(self, *args, **kwargs)
|
||||
@ -269,32 +276,59 @@ class Source(Plugin):
|
||||
|
||||
if authors:
|
||||
# Leave ' in there for Irish names
|
||||
pat = re.compile(r'[-,:;+!@#$%^&*(){}.`~"\s\[\]/]')
|
||||
remove_pat = re.compile(r'[,!@#$%^&*(){}`~"\s\[\]/]')
|
||||
replace_pat = re.compile(r'[-+.:;]')
|
||||
if only_first_author:
|
||||
authors = authors[:1]
|
||||
for au in authors:
|
||||
au = replace_pat.sub(' ', au)
|
||||
parts = au.split()
|
||||
if ',' in au:
|
||||
# au probably in ln, fn form
|
||||
parts = parts[1:] + parts[:1]
|
||||
for tok in parts:
|
||||
tok = pat.sub('', tok).strip()
|
||||
tok = remove_pat.sub('', tok).strip()
|
||||
if len(tok) > 2 and tok.lower() not in ('von', ):
|
||||
yield tok
|
||||
|
||||
|
||||
def get_title_tokens(self, title):
|
||||
def get_title_tokens(self, title, strip_joiners=True, strip_subtitle=False):
|
||||
'''
|
||||
Take a title and return a list of tokens useful for an AND search query.
|
||||
Excludes connectives and punctuation.
|
||||
Excludes connectives(optionally) and punctuation.
|
||||
'''
|
||||
if title:
|
||||
pat = re.compile(r'''[-,:;+!@#$%^&*(){}.`~"'\s\[\]/]''')
|
||||
title = pat.sub(' ', title)
|
||||
# strip sub-titles
|
||||
if strip_subtitle:
|
||||
subtitle = re.compile(r'([\(\[\{].*?[\)\]\}]|[/:\\].*$)')
|
||||
if len(subtitle.sub('', title)) > 1:
|
||||
title = subtitle.sub('', title)
|
||||
|
||||
title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
|
||||
[
|
||||
# Remove things like: (2010) (Omnibus) etc.
|
||||
(r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
|
||||
# Remove any strings that contain the substring edition inside
|
||||
# parentheses
|
||||
(r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),
|
||||
# Remove commas used a separators in numbers
|
||||
(r'(\d+),(\d+)', r'\1\2'),
|
||||
# Remove hyphens only if they have whitespace before them
|
||||
(r'(\s-)', ' '),
|
||||
# Remove single quotes not followed by 's'
|
||||
(r"'(?!s)", ''),
|
||||
# Replace other special chars with a space
|
||||
(r'''[:,;+!@#$%^&*(){}.`~"\s\[\]/]''', ' ')
|
||||
]]
|
||||
|
||||
for pat, repl in title_patterns:
|
||||
title = pat.sub(repl, title)
|
||||
|
||||
tokens = title.split()
|
||||
for token in tokens:
|
||||
token = token.strip()
|
||||
if token and token.lower() not in ('a', 'and', 'the'):
|
||||
if token and (not strip_joiners or token.lower() not in ('a',
|
||||
'and', 'the', '&')):
|
||||
yield token
|
||||
|
||||
def split_jobs(self, jobs, num):
|
||||
@ -341,8 +375,17 @@ class Source(Plugin):
|
||||
|
||||
def get_book_url(self, identifiers):
|
||||
'''
|
||||
Return the URL for the book identified by identifiers at this source.
|
||||
If no URL is found, return None.
|
||||
Return a 3-tuple or None. The 3-tuple is of the form:
|
||||
(identifier_type, identifier_value, URL).
|
||||
The URL is the URL for the book identified by identifiers at this
|
||||
source. identifier_type, identifier_value specify the identifier
|
||||
corresponding to the URL.
|
||||
This URL must be browseable to by a human using a browser. It is meant
|
||||
to provide a clickable link for the user to easily visit the books page
|
||||
at this source.
|
||||
If no URL is found, return None. This method must be quick, and
|
||||
consistent, so only implement it if it is possible to construct the URL
|
||||
from a known scheme given identifiers.
|
||||
'''
|
||||
return None
|
||||
|
||||
|
@ -76,6 +76,11 @@ def run_download(log, results, abort,
|
||||
(plugin, width, height, fmt, bytes)
|
||||
|
||||
'''
|
||||
if title == _('Unknown'):
|
||||
title = None
|
||||
if authors == [_('Unknown')]:
|
||||
authors = None
|
||||
|
||||
plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
|
||||
|
||||
rq = Queue()
|
||||
@ -145,7 +150,7 @@ def download_cover(log,
|
||||
Synchronous cover download. Returns the "best" cover as per user
|
||||
prefs/cover resolution.
|
||||
|
||||
Return cover is a tuple: (plugin, width, height, fmt, data)
|
||||
Returned cover is a tuple: (plugin, width, height, fmt, data)
|
||||
|
||||
Returns None if no cover is found.
|
||||
'''
|
||||
|
@ -173,7 +173,7 @@ class GoogleBooks(Source):
|
||||
def get_book_url(self, identifiers): # {{{
|
||||
goog = identifiers.get('google', None)
|
||||
if goog is not None:
|
||||
return 'http://books.google.com/books?id=%s'%goog
|
||||
return ('google', goog, 'http://books.google.com/books?id=%s'%goog)
|
||||
# }}}
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
|
||||
|
@ -42,6 +42,10 @@ class Worker(Thread):
|
||||
self.log.exception('Plugin', self.plugin.name, 'failed')
|
||||
self.plugin.dl_time_spent = time.time() - start
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.plugin.name
|
||||
|
||||
def is_worker_alive(workers):
|
||||
for w in workers:
|
||||
if w.is_alive():
|
||||
@ -114,8 +118,12 @@ class ISBNMerge(object):
|
||||
|
||||
return self.results
|
||||
|
||||
def merge_metadata_results(self):
|
||||
' Merge results with identical title and authors '
|
||||
def merge_metadata_results(self, merge_on_identifiers=False):
|
||||
'''
|
||||
Merge results with identical title and authors or an identical
|
||||
identifier
|
||||
'''
|
||||
# First title/author
|
||||
groups = {}
|
||||
for result in self.results:
|
||||
title = lower(result.title if result.title else '')
|
||||
@ -135,6 +143,44 @@ class ISBNMerge(object):
|
||||
result = rgroup[0]
|
||||
self.results.append(result)
|
||||
|
||||
if merge_on_identifiers:
|
||||
# Now identifiers
|
||||
groups, empty = {}, []
|
||||
for result in self.results:
|
||||
key = set()
|
||||
for typ, val in result.identifiers.iteritems():
|
||||
if typ and val:
|
||||
key.add((typ, val))
|
||||
if key:
|
||||
key = frozenset(key)
|
||||
match = None
|
||||
for candidate in list(groups):
|
||||
if candidate.intersection(key):
|
||||
# We have at least one identifier in common
|
||||
match = candidate.union(key)
|
||||
results = groups.pop(candidate)
|
||||
results.append(result)
|
||||
groups[match] = results
|
||||
break
|
||||
if match is None:
|
||||
groups[key] = [result]
|
||||
else:
|
||||
empty.append(result)
|
||||
|
||||
if len(groups) != len(self.results):
|
||||
self.results = []
|
||||
for rgroup in groups.itervalues():
|
||||
rel = [r.average_source_relevance for r in rgroup]
|
||||
if len(rgroup) > 1:
|
||||
result = self.merge(rgroup, None, do_asr=False)
|
||||
result.average_source_relevance = sum(rel)/len(rel)
|
||||
elif rgroup:
|
||||
result = rgroup[0]
|
||||
self.results.append(result)
|
||||
|
||||
if empty:
|
||||
self.results.extend(empty)
|
||||
|
||||
self.results.sort(key=attrgetter('average_source_relevance'))
|
||||
|
||||
def merge_isbn_results(self):
|
||||
@ -174,7 +220,7 @@ class ISBNMerge(object):
|
||||
|
||||
# We assume the smallest set of tags has the least cruft in it
|
||||
ans.tags = self.length_merge('tags', results,
|
||||
null_value=ans.tags)
|
||||
null_value=ans.tags, shortest=msprefs['fewer_tags'])
|
||||
|
||||
# We assume the longest series has the most info in it
|
||||
ans.series = self.length_merge('series', results,
|
||||
@ -253,6 +299,10 @@ def merge_identify_results(result_map, log):
|
||||
|
||||
def identify(log, abort, # {{{
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
if title == _('Unknown'):
|
||||
title = None
|
||||
if authors == [_('Unknown')]:
|
||||
authors = None
|
||||
start_time = time.time()
|
||||
plugins = [p for p in metadata_plugins(['identify']) if p.is_configured()]
|
||||
|
||||
@ -302,7 +352,11 @@ def identify(log, abort, # {{{
|
||||
|
||||
if (first_result_at is not None and time.time() - first_result_at >
|
||||
wait_time):
|
||||
log('Not waiting any longer for more results')
|
||||
log.warn('Not waiting any longer for more results. Still running'
|
||||
' sources:')
|
||||
for worker in workers:
|
||||
if worker.is_alive():
|
||||
log.debug('\t' + worker.name)
|
||||
abort.set()
|
||||
break
|
||||
|
||||
@ -336,7 +390,11 @@ def identify(log, abort, # {{{
|
||||
log(plog)
|
||||
log('\n'+'*'*80)
|
||||
|
||||
dummy = Metadata(_('Unknown'))
|
||||
for i, result in enumerate(presults):
|
||||
for f in plugin.prefs['ignore_fields']:
|
||||
if ':' not in f:
|
||||
setattr(result, f, getattr(dummy, f))
|
||||
result.relevance_in_source = i
|
||||
result.has_cached_cover_url = (plugin.cached_cover_url_is_reliable
|
||||
and plugin.get_cached_cover_url(result.identifiers) is not
|
||||
@ -354,29 +412,53 @@ def identify(log, abort, # {{{
|
||||
|
||||
if msprefs['txt_comments']:
|
||||
for r in results:
|
||||
if r.plugin.has_html_comments and r.comments:
|
||||
if r.identify_plugin.has_html_comments and r.comments:
|
||||
r.comments = html2text(r.comments)
|
||||
|
||||
max_tags = msprefs['max_tags']
|
||||
for r in results:
|
||||
r.tags = r.tags[:max_tags]
|
||||
|
||||
if msprefs['swap_author_names']:
|
||||
for r in results:
|
||||
def swap_to_ln_fn(a):
|
||||
if ',' in a:
|
||||
return a
|
||||
parts = a.split(None)
|
||||
if len(parts) <= 1:
|
||||
return a
|
||||
surname = parts[-1]
|
||||
return '%s, %s' % (surname, ' '.join(parts[:-1]))
|
||||
r.authors = [swap_to_ln_fn(a) for a in r.authors]
|
||||
|
||||
return results
|
||||
# }}}
|
||||
|
||||
def urls_from_identifiers(identifiers): # {{{
|
||||
identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()])
|
||||
ans = []
|
||||
for plugin in all_metadata_plugins():
|
||||
try:
|
||||
url = plugin.get_book_url(identifiers)
|
||||
if url is not None:
|
||||
ans.append((plugin.name, url))
|
||||
id_type, id_val, url = plugin.get_book_url(identifiers)
|
||||
ans.append((plugin.name, id_type, id_val, url))
|
||||
except:
|
||||
pass
|
||||
isbn = identifiers.get('isbn', None)
|
||||
if isbn:
|
||||
ans.append(('ISBN',
|
||||
'http://www.worldcat.org/search?q=bn%%3A%s&qt=advanced'%isbn))
|
||||
ans.append((isbn, 'isbn', isbn,
|
||||
'http://www.worldcat.org/isbn/'+isbn))
|
||||
doi = identifiers.get('doi', None)
|
||||
if doi:
|
||||
ans.append(('DOI', 'doi', doi,
|
||||
'http://dx.doi.org/'+doi))
|
||||
arxiv = identifiers.get('arxiv', None)
|
||||
if arxiv:
|
||||
ans.append(('arXiv', 'arxiv', arxiv,
|
||||
'http://arxiv.org/abs/'+arxiv))
|
||||
oclc = identifiers.get('oclc', None)
|
||||
if oclc:
|
||||
ans.append(('OCLC', 'oclc', oclc,
|
||||
'http://www.worldcat.org/oclc/'+oclc))
|
||||
return ans
|
||||
# }}}
|
||||
|
||||
@ -386,13 +468,18 @@ if __name__ == '__main__': # tests {{{
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify,
|
||||
title_test, authors_test)
|
||||
tests = [
|
||||
(
|
||||
{'title':'Magykal Papers',
|
||||
'authors':['Sage']},
|
||||
[title_test('The Magykal Papers', exact=True)],
|
||||
),
|
||||
|
||||
|
||||
( # An e-book ISBN not on Amazon, one of the authors is
|
||||
# unknown to Amazon
|
||||
{'identifiers':{'isbn': '9780307459671'},
|
||||
'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
|
||||
[title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
|
||||
exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
|
||||
[title_test('The Invisible Gorilla', exact=True)]
|
||||
|
||||
),
|
||||
|
||||
@ -400,7 +487,7 @@ if __name__ == '__main__': # tests {{{
|
||||
{'title':'Learning Python',
|
||||
'authors':['Lutz']},
|
||||
[title_test('Learning Python',
|
||||
exact=True), authors_test(['Mark Lutz'])
|
||||
exact=True), authors_test(['Mark J. Lutz', 'David Ascher'])
|
||||
]
|
||||
|
||||
),
|
||||
|
@ -7,7 +7,19 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.metadata.sources.base import Source
|
||||
from urllib import quote
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.icu import lower
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
|
||||
BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%s&page_number=1&results=subjects,authors,texts&'
|
||||
|
||||
|
||||
class ISBNDB(Source):
|
||||
|
||||
@ -18,6 +30,20 @@ class ISBNDB(Source):
|
||||
touched_fields = frozenset(['title', 'authors',
|
||||
'identifier:isbn', 'comments', 'publisher'])
|
||||
supports_gzip_transfer_encoding = True
|
||||
# Shortcut, since we have no cached cover URLS
|
||||
cached_cover_url_is_reliable = False
|
||||
|
||||
options = (
|
||||
Option('isbndb_key', 'string', None, _('IsbnDB key:'),
|
||||
_('To use isbndb.com you have to sign up for a free account'
|
||||
'at isbndb.com and get an access key.')),
|
||||
)
|
||||
|
||||
config_help_message = '<p>'+_('To use metadata from isbndb.com you must sign'
|
||||
' up for a free account and get an isbndb key and enter it below.'
|
||||
' Instructions to get the key are '
|
||||
'<a href="http://isbndb.com/docs/api/30-keys.html">here</a>.')
|
||||
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
Source.__init__(self, *args, **kwargs)
|
||||
@ -35,9 +61,186 @@ class ISBNDB(Source):
|
||||
except:
|
||||
pass
|
||||
|
||||
self.isbndb_key = prefs['isbndb_key']
|
||||
@property
|
||||
def isbndb_key(self):
|
||||
return self.prefs['isbndb_key']
|
||||
|
||||
def is_configured(self):
|
||||
return self.isbndb_key is not None
|
||||
|
||||
def create_query(self, title=None, authors=None, identifiers={}): # {{{
|
||||
base_url = BASE_URL%self.isbndb_key
|
||||
isbn = check_isbn(identifiers.get('isbn', None))
|
||||
q = ''
|
||||
if isbn is not None:
|
||||
q = 'index1=isbn&value1='+isbn
|
||||
elif title or authors:
|
||||
tokens = []
|
||||
title_tokens = list(self.get_title_tokens(title))
|
||||
tokens += title_tokens
|
||||
author_tokens = self.get_author_tokens(authors,
|
||||
only_first_author=True)
|
||||
tokens += author_tokens
|
||||
tokens = [quote(t) for t in tokens]
|
||||
q = '+'.join(tokens)
|
||||
q = 'index1=combined&value1='+q
|
||||
|
||||
if not q:
|
||||
return None
|
||||
if isinstance(q, unicode):
|
||||
q = q.encode('utf-8')
|
||||
return base_url + q
|
||||
# }}}
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
if not self.is_configured():
|
||||
return
|
||||
query = self.create_query(title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if not query:
|
||||
err = 'Insufficient metadata to construct query'
|
||||
log.error(err)
|
||||
return err
|
||||
|
||||
results = []
|
||||
try:
|
||||
results = self.make_query(query, abort, title=title, authors=authors,
|
||||
identifiers=identifiers, timeout=timeout)
|
||||
except:
|
||||
err = 'Failed to make query to ISBNDb, aborting.'
|
||||
log.exception(err)
|
||||
return err
|
||||
|
||||
if not results and identifiers.get('isbn', False) and title and authors and \
|
||||
not abort.is_set():
|
||||
return self.identify(log, result_queue, abort, title=title,
|
||||
authors=authors, timeout=timeout)
|
||||
|
||||
for result in results:
|
||||
self.clean_downloaded_metadata(result)
|
||||
result_queue.put(result)
|
||||
|
||||
def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
|
||||
|
||||
def tostring(x):
|
||||
if x is None:
|
||||
return ''
|
||||
return etree.tostring(x, method='text', encoding=unicode).strip()
|
||||
|
||||
orig_isbn = identifiers.get('isbn', None)
|
||||
title_tokens = list(self.get_title_tokens(orig_title))
|
||||
author_tokens = list(self.get_author_tokens(orig_authors))
|
||||
results = []
|
||||
|
||||
def ismatch(title, authors):
|
||||
authors = lower(' '.join(authors))
|
||||
title = lower(title)
|
||||
match = not title_tokens
|
||||
for t in title_tokens:
|
||||
if lower(t) in title:
|
||||
match = True
|
||||
break
|
||||
amatch = not author_tokens
|
||||
for a in author_tokens:
|
||||
if lower(a) in authors:
|
||||
amatch = True
|
||||
break
|
||||
if not author_tokens: amatch = True
|
||||
return match and amatch
|
||||
|
||||
bl = feed.find('BookList')
|
||||
if bl is None:
|
||||
err = tostring(etree.find('errormessage'))
|
||||
raise ValueError('ISBNDb query failed:' + err)
|
||||
total_results = int(bl.get('total_results'))
|
||||
shown_results = int(bl.get('shown_results'))
|
||||
for bd in bl.xpath('.//BookData'):
|
||||
isbn = check_isbn(bd.get('isbn13', bd.get('isbn', None)))
|
||||
if not isbn:
|
||||
continue
|
||||
if orig_isbn and isbn != orig_isbn:
|
||||
continue
|
||||
title = tostring(bd.find('Title'))
|
||||
if not title:
|
||||
continue
|
||||
authors = []
|
||||
for au in bd.xpath('.//Authors/Person'):
|
||||
au = tostring(au)
|
||||
if au:
|
||||
if ',' in au:
|
||||
ln, _, fn = au.partition(',')
|
||||
au = fn.strip() + ' ' + ln.strip()
|
||||
authors.append(au)
|
||||
if not authors:
|
||||
continue
|
||||
comments = tostring(bd.find('Summary'))
|
||||
if not comments:
|
||||
# Require comments, since without them the result is useless
|
||||
# anyway
|
||||
continue
|
||||
id_ = (title, tuple(authors))
|
||||
if id_ in seen:
|
||||
continue
|
||||
seen.add(id_)
|
||||
if not ismatch(title, authors):
|
||||
continue
|
||||
publisher = tostring(bd.find('PublisherText'))
|
||||
if not publisher: publisher = None
|
||||
if publisher and 'audio' in publisher.lower():
|
||||
continue
|
||||
mi = Metadata(title, authors)
|
||||
mi.isbn = isbn
|
||||
mi.publisher = publisher
|
||||
mi.comments = comments
|
||||
results.append(mi)
|
||||
return total_results, shown_results, results
|
||||
|
||||
def make_query(self, q, abort, title=None, authors=None, identifiers={},
|
||||
max_pages=10, timeout=30):
|
||||
page_num = 1
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
br = self.browser
|
||||
|
||||
seen = set()
|
||||
|
||||
candidates = []
|
||||
total_found = 0
|
||||
while page_num <= max_pages and not abort.is_set():
|
||||
url = q.replace('&page_number=1&', '&page_number=%d&'%page_num)
|
||||
page_num += 1
|
||||
raw = br.open_novisit(url, timeout=timeout).read()
|
||||
feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
|
||||
strip_encoding_pats=True)[0], parser=parser)
|
||||
total, found, results = self.parse_feed(
|
||||
feed, seen, title, authors, identifiers)
|
||||
total_found += found
|
||||
candidates += results
|
||||
if total_found >= total or len(candidates) > 9:
|
||||
break
|
||||
|
||||
return candidates
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
# To run these test use:
|
||||
# calibre-debug -e src/calibre/ebooks/metadata/sources/isbndb.py
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||
title_test, authors_test)
|
||||
test_identify_plugin(ISBNDB.name,
|
||||
[
|
||||
|
||||
|
||||
(
|
||||
{'title':'Great Gatsby',
|
||||
'authors':['Fitzgerald']},
|
||||
[title_test('The great gatsby', exact=True),
|
||||
authors_test(['F. Scott Fitzgerald'])]
|
||||
),
|
||||
|
||||
(
|
||||
{'title': 'Flatland', 'authors':['Abbott']},
|
||||
[title_test('Flatland', exact=False)]
|
||||
),
|
||||
])
|
||||
|
||||
|
468
src/calibre/ebooks/metadata/sources/overdrive.py
Executable file
@ -0,0 +1,468 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Fetch metadata using Overdrive Content Reserve
|
||||
'''
|
||||
import re, random, mechanize, copy, json
|
||||
from threading import RLock
|
||||
from Queue import Queue, Empty
|
||||
|
||||
from lxml import html
|
||||
from lxml.html import soupparser
|
||||
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.library.comments import sanitize_comments_html
|
||||
|
||||
ovrdrv_data_cache = {}
|
||||
cache_lock = RLock()
|
||||
base_url = 'http://search.overdrive.com/'
|
||||
|
||||
|
||||
class OverDrive(Source):
|
||||
|
||||
name = 'Overdrive'
|
||||
description = _('Downloads metadata from Overdrive\'s Content Reserve')
|
||||
|
||||
capabilities = frozenset(['identify', 'cover'])
|
||||
touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
|
||||
'comments', 'publisher', 'identifier:isbn', 'series', 'series_index',
|
||||
'language', 'identifier:overdrive'])
|
||||
has_html_comments = True
|
||||
supports_gzip_transfer_encoding = False
|
||||
cached_cover_url_is_reliable = True
|
||||
|
||||
options = (
|
||||
Option('get_full_metadata', 'bool', False,
|
||||
_('Download all metadata (slow)'),
|
||||
_('Enable this option to gather all metadata available from Overdrive.')),
|
||||
)
|
||||
|
||||
config_help_message = '<p>'+_('Additional metadata can be taken from Overdrive\'s book detail'
|
||||
' page. This includes a limited set of tags used by libraries, comments, language,'
|
||||
' and the ebook ISBN. Collecting this data is disabled by default due to the extra'
|
||||
' time required. Check the download all metadata option below to'
|
||||
' enable downloading this data.')
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
ovrdrv_id = identifiers.get('overdrive', None)
|
||||
isbn = identifiers.get('isbn', None)
|
||||
|
||||
br = self.browser
|
||||
ovrdrv_data = self.to_ovrdrv_data(br, log, title, authors, ovrdrv_id)
|
||||
if ovrdrv_data:
|
||||
title = ovrdrv_data[8]
|
||||
authors = ovrdrv_data[6]
|
||||
mi = Metadata(title, authors)
|
||||
self.parse_search_results(ovrdrv_data, mi)
|
||||
if ovrdrv_id is None:
|
||||
ovrdrv_id = ovrdrv_data[7]
|
||||
|
||||
if self.prefs['get_full_metadata']:
|
||||
self.get_book_detail(br, ovrdrv_data[1], mi, ovrdrv_id, log)
|
||||
|
||||
if isbn is not None:
|
||||
self.cache_isbn_to_identifier(isbn, ovrdrv_id)
|
||||
|
||||
result_queue.put(mi)
|
||||
|
||||
return None
|
||||
# }}}
|
||||
|
||||
def download_cover(self, log, result_queue, abort, # {{{
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
cached_url = self.get_cached_cover_url(identifiers)
|
||||
if cached_url is None:
|
||||
log.info('No cached cover found, running identify')
|
||||
rq = Queue()
|
||||
self.identify(log, rq, abort, title=title, authors=authors,
|
||||
identifiers=identifiers)
|
||||
if abort.is_set():
|
||||
return
|
||||
results = []
|
||||
while True:
|
||||
try:
|
||||
results.append(rq.get_nowait())
|
||||
except Empty:
|
||||
break
|
||||
results.sort(key=self.identify_results_keygen(
|
||||
title=title, authors=authors, identifiers=identifiers))
|
||||
for mi in results:
|
||||
cached_url = self.get_cached_cover_url(mi.identifiers)
|
||||
if cached_url is not None:
|
||||
break
|
||||
if cached_url is None:
|
||||
log.info('No cover found')
|
||||
return
|
||||
|
||||
if abort.is_set():
|
||||
return
|
||||
|
||||
ovrdrv_id = identifiers.get('overdrive', None)
|
||||
br = self.browser
|
||||
req = mechanize.Request(cached_url)
|
||||
if ovrdrv_id is not None:
|
||||
referer = self.get_base_referer()+'ContentDetails-Cover.htm?ID='+ovrdrv_id
|
||||
req.add_header('referer', referer)
|
||||
|
||||
log('Downloading cover from:', cached_url)
|
||||
try:
|
||||
cdata = br.open_novisit(req, timeout=timeout).read()
|
||||
result_queue.put((self, cdata))
|
||||
except:
|
||||
log.exception('Failed to download cover from:', cached_url)
|
||||
# }}}
|
||||
|
||||
def get_cached_cover_url(self, identifiers): # {{{
|
||||
url = None
|
||||
ovrdrv_id = identifiers.get('overdrive', None)
|
||||
if ovrdrv_id is None:
|
||||
isbn = identifiers.get('isbn', None)
|
||||
if isbn is not None:
|
||||
ovrdrv_id = self.cached_isbn_to_identifier(isbn)
|
||||
if ovrdrv_id is not None:
|
||||
url = self.cached_identifier_to_cover_url(ovrdrv_id)
|
||||
|
||||
return url
|
||||
# }}}
|
||||
|
||||
def get_base_referer(self): # to be used for passing referrer headers to cover download
|
||||
choices = [
|
||||
'http://overdrive.chipublib.org/82DC601D-7DDE-4212-B43A-09D821935B01/10/375/en/',
|
||||
'http://emedia.clevnet.org/9D321DAD-EC0D-490D-BFD8-64AE2C96ECA8/10/241/en/',
|
||||
'http://singapore.lib.overdrive.com/F11D55BE-A917-4D63-8111-318E88B29740/10/382/en/',
|
||||
'http://ebooks.nypl.org/20E48048-A377-4520-BC43-F8729A42A424/10/257/en/',
|
||||
'http://spl.lib.overdrive.com/5875E082-4CB2-4689-9426-8509F354AFEF/10/335/en/'
|
||||
]
|
||||
return choices[random.randint(0, len(choices)-1)]
|
||||
|
||||
def format_results(self, reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid):
|
||||
fix_slashes = re.compile(r'\\/')
|
||||
thumbimage = fix_slashes.sub('/', thumbimage)
|
||||
worldcatlink = fix_slashes.sub('/', worldcatlink)
|
||||
cover_url = re.sub('(?P<img>(Ima?g(eType-)?))200', '\g<img>100', thumbimage)
|
||||
social_metadata_url = base_url+'TitleInfo.aspx?ReserveID='+reserveid+'&FormatID='+formatid
|
||||
series_num = ''
|
||||
if not series:
|
||||
if subtitle:
|
||||
title = od_title+': '+subtitle
|
||||
else:
|
||||
title = od_title
|
||||
else:
|
||||
title = od_title
|
||||
m = re.search("([0-9]+$)", subtitle)
|
||||
if m:
|
||||
series_num = float(m.group(1))
|
||||
return [cover_url, social_metadata_url, worldcatlink, series, series_num, publisher, creators, reserveid, title]
|
||||
|
||||
def safe_query(self, br, query_url, post=''):
|
||||
'''
|
||||
The query must be initialized by loading an empty search results page
|
||||
this page attempts to set a cookie that Mechanize doesn't like
|
||||
copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
|
||||
'''
|
||||
goodcookies = br._ua_handlers['_cookies'].cookiejar
|
||||
clean_cj = mechanize.CookieJar()
|
||||
cookies_to_copy = []
|
||||
for cookie in goodcookies:
|
||||
copied_cookie = copy.deepcopy(cookie)
|
||||
cookies_to_copy.append(copied_cookie)
|
||||
for copied_cookie in cookies_to_copy:
|
||||
clean_cj.set_cookie(copied_cookie)
|
||||
|
||||
if post:
|
||||
br.open_novisit(query_url, post)
|
||||
else:
|
||||
br.open_novisit(query_url)
|
||||
|
||||
br.set_cookiejar(clean_cj)
|
||||
|
||||
def overdrive_search(self, br, log, q, title, author):
|
||||
# re-initialize the cookiejar to so that it's clean
|
||||
clean_cj = mechanize.CookieJar()
|
||||
br.set_cookiejar(clean_cj)
|
||||
q_query = q+'default.aspx/SearchByKeyword'
|
||||
q_init_search = q+'SearchResults.aspx'
|
||||
# get first author as string - convert this to a proper cleanup function later
|
||||
author_tokens = list(self.get_author_tokens(author,
|
||||
only_first_author=True))
|
||||
title_tokens = list(self.get_title_tokens(title,
|
||||
strip_joiners=False, strip_subtitle=True))
|
||||
|
||||
if len(title_tokens) >= len(author_tokens):
|
||||
initial_q = ' '.join(title_tokens)
|
||||
xref_q = '+'.join(author_tokens)
|
||||
else:
|
||||
initial_q = ' '.join(author_tokens)
|
||||
xref_q = '+'.join(title_tokens)
|
||||
#log.error('Initial query is %s'%initial_q)
|
||||
#log.error('Cross reference query is %s'%xref_q)
|
||||
|
||||
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
|
||||
query = '{"szKeyword":"'+initial_q+'"}'
|
||||
|
||||
# main query, requires specific Content Type header
|
||||
req = mechanize.Request(q_query)
|
||||
req.add_header('Content-Type', 'application/json; charset=utf-8')
|
||||
br.open_novisit(req, query)
|
||||
|
||||
# initiate the search without messing up the cookiejar
|
||||
self.safe_query(br, q_init_search)
|
||||
|
||||
# get the search results object
|
||||
results = False
|
||||
while results == False:
|
||||
xreq = mechanize.Request(q_xref)
|
||||
xreq.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
xreq.add_header('Referer', q_init_search)
|
||||
xreq.add_header('Accept', 'application/json, text/javascript, */*')
|
||||
raw = br.open_novisit(xreq).read()
|
||||
for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
|
||||
if int(m.group('displayrecords')) >= 1:
|
||||
results = True
|
||||
elif int(m.group('totalrecords')) >= 1:
|
||||
if int(m.group('totalrecords')) >= 100:
|
||||
if xref_q.find('+') != -1:
|
||||
xref_tokens = xref_q.split('+')
|
||||
xref_q = xref_tokens[0]
|
||||
#log.error('xref_q is '+xref_q)
|
||||
else:
|
||||
xref_q = ''
|
||||
xref_q = ''
|
||||
q_xref = q+'SearchResults.svc/GetResults?iDisplayLength=50&sSearch='+xref_q
|
||||
elif int(m.group('totalrecords')) == 0:
|
||||
return ''
|
||||
|
||||
return self.sort_ovrdrv_results(raw, log, title, title_tokens, author, author_tokens)
|
||||
|
||||
|
||||
def sort_ovrdrv_results(self, raw, log, title=None, title_tokens=None, author=None, author_tokens=None, ovrdrv_id=None):
|
||||
close_matches = []
|
||||
raw = re.sub('.*?\[\[(?P<content>.*?)\]\].*', '[[\g<content>]]', raw)
|
||||
results = json.loads(raw)
|
||||
#log.error('raw results are:'+str(results))
|
||||
# The search results are either from a keyword search or a multi-format list from a single ID,
|
||||
# sort through the results for closest match/format
|
||||
if results:
|
||||
for reserveid, od_title, subtitle, edition, series, publisher, format, formatid, creators, \
|
||||
thumbimage, shortdescription, worldcatlink, excerptlink, creatorfile, sorttitle, \
|
||||
availabletolibrary, availabletoretailer, relevancyrank, unknown1, unknown2, unknown3 in results:
|
||||
#log.error("this record's title is "+od_title+", subtitle is "+subtitle+", author[s] are "+creators+", series is "+series)
|
||||
if ovrdrv_id is not None and int(formatid) in [1, 50, 410, 900]:
|
||||
#log.error('overdrive id is not None, searching based on format type priority')
|
||||
return self.format_results(reserveid, od_title, subtitle, series, publisher,
|
||||
creators, thumbimage, worldcatlink, formatid)
|
||||
else:
|
||||
if creators:
|
||||
creators = creators.split(', ')
|
||||
# if an exact match in a preferred format occurs
|
||||
if ((author and creators and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
|
||||
return self.format_results(reserveid, od_title, subtitle, series, publisher,
|
||||
creators, thumbimage, worldcatlink, formatid)
|
||||
else:
|
||||
close_title_match = False
|
||||
close_author_match = False
|
||||
for token in title_tokens:
|
||||
if od_title.lower().find(token.lower()) != -1:
|
||||
close_title_match = True
|
||||
else:
|
||||
close_title_match = False
|
||||
break
|
||||
for author in creators:
|
||||
for token in author_tokens:
|
||||
if author.lower().find(token.lower()) != -1:
|
||||
close_author_match = True
|
||||
else:
|
||||
close_author_match = False
|
||||
break
|
||||
if close_author_match:
|
||||
break
|
||||
if close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900] and thumbimage:
|
||||
if subtitle and series:
|
||||
close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
||||
else:
|
||||
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
||||
|
||||
elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
|
||||
close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
|
||||
|
||||
if close_matches:
|
||||
return close_matches[0]
|
||||
else:
|
||||
return ''
|
||||
else:
|
||||
return ''
|
||||
|
||||
def overdrive_get_record(self, br, log, q, ovrdrv_id):
|
||||
search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
|
||||
results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
|
||||
|
||||
# re-initialize the cookiejar to so that it's clean
|
||||
clean_cj = mechanize.CookieJar()
|
||||
br.set_cookiejar(clean_cj)
|
||||
# get the base url to set the proper session cookie
|
||||
br.open_novisit(q)
|
||||
|
||||
# initialize the search
|
||||
self.safe_query(br, search_url)
|
||||
|
||||
# get the results
|
||||
req = mechanize.Request(results_url)
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.add_header('Referer', search_url)
|
||||
req.add_header('Accept', 'application/json, text/javascript, */*')
|
||||
raw = br.open_novisit(req)
|
||||
raw = str(list(raw))
|
||||
clean_cj = mechanize.CookieJar()
|
||||
br.set_cookiejar(clean_cj)
|
||||
return self.sort_ovrdrv_results(raw, log, None, None, None, ovrdrv_id)
|
||||
|
||||
|
||||
def find_ovrdrv_data(self, br, log, title, author, isbn, ovrdrv_id=None):
|
||||
q = base_url
|
||||
if ovrdrv_id is None:
|
||||
return self.overdrive_search(br, log, q, title, author)
|
||||
else:
|
||||
return self.overdrive_get_record(br, log, q, ovrdrv_id)
|
||||
|
||||
|
||||
|
||||
def to_ovrdrv_data(self, br, log, title=None, author=None, ovrdrv_id=None):
|
||||
'''
|
||||
Takes either a title/author combo or an Overdrive ID. One of these
|
||||
two must be passed to this function.
|
||||
'''
|
||||
if ovrdrv_id is not None:
|
||||
with cache_lock:
|
||||
ans = ovrdrv_data_cache.get(ovrdrv_id, None)
|
||||
if ans:
|
||||
return ans
|
||||
elif ans is False:
|
||||
return None
|
||||
else:
|
||||
ovrdrv_data = self.find_ovrdrv_data(br, log, title, author, ovrdrv_id)
|
||||
else:
|
||||
try:
|
||||
ovrdrv_data = self.find_ovrdrv_data(br, log, title, author, ovrdrv_id)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
ovrdrv_data = None
|
||||
with cache_lock:
|
||||
ovrdrv_data_cache[ovrdrv_id] = ovrdrv_data if ovrdrv_data else False
|
||||
|
||||
return ovrdrv_data if ovrdrv_data else False
|
||||
|
||||
|
||||
def parse_search_results(self, ovrdrv_data, mi):
|
||||
'''
|
||||
Parse the formatted search results from the initial Overdrive query and
|
||||
add the values to the metadta.
|
||||
|
||||
The list object has these values:
|
||||
[cover_url[0], social_metadata_url[1], worldcatlink[2], series[3], series_num[4],
|
||||
publisher[5], creators[6], reserveid[7], title[8]]
|
||||
|
||||
'''
|
||||
ovrdrv_id = ovrdrv_data[7]
|
||||
mi.set_identifier('overdrive', ovrdrv_id)
|
||||
|
||||
if len(ovrdrv_data[3]) > 1:
|
||||
mi.series = ovrdrv_data[3]
|
||||
if ovrdrv_data[4]:
|
||||
try:
|
||||
mi.series_index = float(ovrdrv_data[4])
|
||||
except:
|
||||
pass
|
||||
mi.publisher = ovrdrv_data[5]
|
||||
mi.authors = ovrdrv_data[6]
|
||||
mi.title = ovrdrv_data[8]
|
||||
cover_url = ovrdrv_data[0]
|
||||
if cover_url:
|
||||
self.cache_identifier_to_cover_url(ovrdrv_id,
|
||||
cover_url)
|
||||
|
||||
|
||||
def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log):
|
||||
try:
|
||||
raw = br.open_novisit(metadata_url).read()
|
||||
except Exception, e:
|
||||
if callable(getattr(e, 'getcode', None)) and \
|
||||
e.getcode() == 404:
|
||||
return False
|
||||
raise
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||
resolve_entities=True)[0]
|
||||
try:
|
||||
root = soupparser.fromstring(raw)
|
||||
except:
|
||||
return False
|
||||
|
||||
pub_date = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblPubDate']/text()")
|
||||
lang = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblLanguage']/text()")
|
||||
subjects = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblSubjects']/text()")
|
||||
ebook_isbn = root.xpath("//td/label[@id='ctl00_ContentPlaceHolder1_lblIdentifier']/text()")
|
||||
desc = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblDescription']/ancestor::div[1]")
|
||||
|
||||
if pub_date:
|
||||
from calibre.utils.date import parse_date
|
||||
try:
|
||||
mi.pubdate = parse_date(pub_date[0].strip())
|
||||
except:
|
||||
pass
|
||||
if lang:
|
||||
lang = lang[0].strip().lower()
|
||||
mi.language = {'english':'en', 'french':'fr', 'german':'de',
|
||||
'spanish':'es'}.get(lang, None)
|
||||
|
||||
if ebook_isbn:
|
||||
#print "ebook isbn is "+str(ebook_isbn[0])
|
||||
isbn = check_isbn(ebook_isbn[0].strip())
|
||||
if isbn:
|
||||
self.cache_isbn_to_identifier(isbn, ovrdrv_id)
|
||||
mi.isbn = isbn
|
||||
if subjects:
|
||||
mi.tags = [tag.strip() for tag in subjects[0].split(',')]
|
||||
|
||||
if desc:
|
||||
desc = desc[0]
|
||||
desc = html.tostring(desc, method='html', encoding=unicode).strip()
|
||||
# remove all attributes from tags
|
||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||
# Remove comments
|
||||
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
||||
mi.comments = sanitize_comments_html(desc)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# To run these test use:
|
||||
# calibre-debug -e src/calibre/ebooks/metadata/sources/overdrive.py
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||
title_test, authors_test)
|
||||
test_identify_plugin(OverDrive.name,
|
||||
[
|
||||
|
||||
(
|
||||
{'title':'Foundation and Earth',
|
||||
'authors':['Asimov']},
|
||||
[title_test('Foundation and Earth', exact=True),
|
||||
authors_test(['Isaac Asimov'])]
|
||||
),
|
||||
|
||||
(
|
||||
{'title': 'Elephants', 'authors':['Agatha']},
|
||||
[title_test('Elephants Can Remember', exact=False),
|
||||
authors_test(['Agatha Christie'])]
|
||||
),
|
||||
])
|
@ -15,14 +15,17 @@ from calibre.customize.ui import metadata_plugins
|
||||
from calibre import prints, sanitize_file_name2
|
||||
from calibre.ebooks.metadata import check_isbn
|
||||
from calibre.ebooks.metadata.sources.base import (create_log,
|
||||
get_cached_cover_urls)
|
||||
get_cached_cover_urls, msprefs)
|
||||
|
||||
def isbn_test(isbn):
|
||||
isbn_ = check_isbn(isbn)
|
||||
|
||||
def test(mi):
|
||||
misbn = check_isbn(mi.isbn)
|
||||
return misbn and misbn == isbn_
|
||||
if misbn and misbn == isbn_:
|
||||
return True
|
||||
prints('ISBN test failed. Expected: \'%s\' found \'%s\''%(isbn_, misbn))
|
||||
return False
|
||||
|
||||
return test
|
||||
|
||||
@ -32,8 +35,11 @@ def title_test(title, exact=False):
|
||||
|
||||
def test(mi):
|
||||
mt = mi.title.lower()
|
||||
return (exact and mt == title) or \
|
||||
(not exact and title in mt)
|
||||
if (exact and mt == title) or \
|
||||
(not exact and title in mt):
|
||||
return True
|
||||
prints('Title test failed. Expected: \'%s\' found \'%s\''%(title, mt))
|
||||
return False
|
||||
|
||||
return test
|
||||
|
||||
@ -42,7 +48,39 @@ def authors_test(authors):
|
||||
|
||||
def test(mi):
|
||||
au = set([x.lower() for x in mi.authors])
|
||||
return au == authors
|
||||
if msprefs['swap_author_names']:
|
||||
def revert_to_fn_ln(a):
|
||||
if ',' not in a:
|
||||
return a
|
||||
parts = a.split(',', 1)
|
||||
t = parts[-1]
|
||||
parts = parts[:-1]
|
||||
parts.insert(0, t)
|
||||
return ' '.join(parts)
|
||||
|
||||
au = set([revert_to_fn_ln(x) for x in au])
|
||||
|
||||
if au == authors:
|
||||
return True
|
||||
prints('Author test failed. Expected: \'%s\' found \'%s\''%(authors, au))
|
||||
return False
|
||||
|
||||
return test
|
||||
|
||||
def series_test(series, series_index):
|
||||
series = series.lower()
|
||||
|
||||
def test(mi):
|
||||
ms = mi.series.lower() if mi.series else ''
|
||||
if (ms == series) and (series_index == mi.series_index):
|
||||
return True
|
||||
if mi.series:
|
||||
prints('Series test failed. Expected: \'%s [%d]\' found \'%s[%d]\''% \
|
||||
(series, series_index, ms, mi.series_index))
|
||||
else:
|
||||
prints('Series test failed. Expected: \'%s [%d]\' found no series'% \
|
||||
(series, series_index))
|
||||
return False
|
||||
|
||||
return test
|
||||
|
||||
@ -218,11 +256,11 @@ def test_identify_plugin(name, tests): # {{{
|
||||
'')+'-%s-cover.jpg'%sanitize_file_name2(mi.title.replace(' ',
|
||||
'_')))
|
||||
with open(cover, 'wb') as f:
|
||||
f.write(cdata)
|
||||
f.write(cdata[-1])
|
||||
|
||||
prints('Cover downloaded to:', cover)
|
||||
|
||||
if len(cdata) < 10240:
|
||||
if len(cdata[-1]) < 10240:
|
||||
prints('Downloaded cover too small')
|
||||
raise SystemExit(1)
|
||||
|
||||
|
@ -222,7 +222,7 @@ class SaveWorker(Thread):
|
||||
if isbytestring(fpath):
|
||||
fpath = fpath.decode(filesystem_encoding)
|
||||
formats[fmt.lower()] = fpath
|
||||
data[i] = [opf, cpath, formats]
|
||||
data[i] = [opf, cpath, formats, mi.last_modified.isoformat()]
|
||||
return data
|
||||
|
||||
def run(self):
|
||||
|
408
src/calibre/ebooks/mobi/debug.py
Normal file
@ -0,0 +1,408 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct, datetime
|
||||
from calibre.utils.date import utc_tz
|
||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||
|
||||
class PalmDOCAttributes(object):
|
||||
|
||||
class Attr(object):
|
||||
|
||||
def __init__(self, name, field, val):
|
||||
self.name = name
|
||||
self.val = val & field
|
||||
|
||||
def __str__(self):
|
||||
return '%s: %s'%(self.name, bool(self.val))
|
||||
|
||||
def __init__(self, raw):
|
||||
self.val = struct.unpack(b'<H', raw)[0]
|
||||
self.attributes = []
|
||||
for name, field in [('Read Only', 0x02), ('Dirty AppInfoArea', 0x04),
|
||||
('Backup this database', 0x08),
|
||||
('Okay to install newer over existing copy, if present on PalmPilot', 0x10),
|
||||
('Force the PalmPilot to reset after this database is installed', 0x12),
|
||||
('Don\'t allow copy of file to be beamed to other Pilot',
|
||||
0x14)]:
|
||||
self.attributes.append(PalmDOCAttributes.Attr(name, field,
|
||||
self.val))
|
||||
|
||||
def __str__(self):
|
||||
attrs = '\n\t'.join([str(x) for x in self.attributes])
|
||||
return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
|
||||
|
||||
class PalmDB(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.raw = raw
|
||||
|
||||
if self.raw.startswith(b'TPZ'):
|
||||
raise ValueError('This is a Topaz file')
|
||||
|
||||
self.name = self.raw[:32].replace(b'\x00', b'')
|
||||
self.attributes = PalmDOCAttributes(self.raw[32:34])
|
||||
self.version = struct.unpack(b'>H', self.raw[34:36])[0]
|
||||
|
||||
palm_epoch = datetime.datetime(1904, 1, 1, tzinfo=utc_tz)
|
||||
self.creation_date_raw = struct.unpack(b'>I', self.raw[36:40])[0]
|
||||
self.creation_date = (palm_epoch +
|
||||
datetime.timedelta(seconds=self.creation_date_raw))
|
||||
self.modification_date_raw = struct.unpack(b'>I', self.raw[40:44])[0]
|
||||
self.modification_date = (palm_epoch +
|
||||
datetime.timedelta(seconds=self.modification_date_raw))
|
||||
self.last_backup_date_raw = struct.unpack(b'>I', self.raw[44:48])[0]
|
||||
self.last_backup_date = (palm_epoch +
|
||||
datetime.timedelta(seconds=self.last_backup_date_raw))
|
||||
self.modification_number = struct.unpack(b'>I', self.raw[48:52])[0]
|
||||
self.app_info_id = self.raw[52:56]
|
||||
self.sort_info_id = self.raw[56:60]
|
||||
self.type = self.raw[60:64]
|
||||
self.creator = self.raw[64:68]
|
||||
self.ident = self.type + self.creator
|
||||
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
|
||||
raise ValueError('Unknown book ident: %r'%self.ident)
|
||||
self.uid_seed = self.raw[68:72]
|
||||
self.next_rec_list_id = self.raw[72:76]
|
||||
|
||||
self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
|
||||
|
||||
def __str__(self):
|
||||
ans = ['*'*20 + ' PalmDB Header '+ '*'*20]
|
||||
ans.append('Name: %r'%self.name)
|
||||
ans.append(str(self.attributes))
|
||||
ans.append('Version: %s'%self.version)
|
||||
ans.append('Creation date: %s (%s)'%(self.creation_date.isoformat(),
|
||||
self.creation_date_raw))
|
||||
ans.append('Modification date: %s (%s)'%(self.modification_date.isoformat(),
|
||||
self.modification_date_raw))
|
||||
ans.append('Backup date: %s (%s)'%(self.last_backup_date.isoformat(),
|
||||
self.last_backup_date_raw))
|
||||
ans.append('Modification number: %s'%self.modification_number)
|
||||
ans.append('App Info ID: %r'%self.app_info_id)
|
||||
ans.append('Sort Info ID: %r'%self.sort_info_id)
|
||||
ans.append('Type: %r'%self.type)
|
||||
ans.append('Creator: %r'%self.creator)
|
||||
ans.append('UID seed: %r'%self.uid_seed)
|
||||
ans.append('Next record list id: %r'%self.next_rec_list_id)
|
||||
ans.append('Number of records: %s'%self.number_of_records)
|
||||
|
||||
return '\n'.join(ans)
|
||||
|
||||
class Record(object):
|
||||
|
||||
def __init__(self, raw, header):
|
||||
self.offset, self.flags, self.uid = header
|
||||
self.raw = raw
|
||||
|
||||
@property
|
||||
def header(self):
|
||||
return 'Offset: %d Flags: %d UID: %d'%(self.offset, self.flags,
|
||||
self.uid)
|
||||
|
||||
class EXTHRecord(object):
|
||||
|
||||
def __init__(self, type_, data):
|
||||
self.type = type_
|
||||
self.data = data
|
||||
self.name = {
|
||||
1 : 'DRM Server id',
|
||||
2 : 'DRM Commerce id',
|
||||
3 : 'DRM ebookbase book id',
|
||||
100 : 'author',
|
||||
101 : 'publisher',
|
||||
102 : 'imprint',
|
||||
103 : 'description',
|
||||
104 : 'isbn',
|
||||
105 : 'subject',
|
||||
106 : 'publishingdate',
|
||||
107 : 'review',
|
||||
108 : 'contributor',
|
||||
109 : 'rights',
|
||||
110 : 'subjectcode',
|
||||
111 : 'type',
|
||||
112 : 'source',
|
||||
113 : 'asin',
|
||||
114 : 'versionnumber',
|
||||
115 : 'sample',
|
||||
116 : 'startreading',
|
||||
117 : 'adult',
|
||||
118 : 'retailprice',
|
||||
119 : 'retailpricecurrency',
|
||||
201 : 'coveroffset',
|
||||
202 : 'thumboffset',
|
||||
203 : 'hasfakecover',
|
||||
204 : 'Creator Software',
|
||||
205 : 'Creator Major Version', # '>I'
|
||||
206 : 'Creator Minor Version', # '>I'
|
||||
207 : 'Creator Build Number', # '>I'
|
||||
208 : 'watermark',
|
||||
209 : 'tamper_proof_keys',
|
||||
300 : 'fontsignature',
|
||||
301 : 'clippinglimit', # percentage '>B'
|
||||
402 : 'publisherlimit',
|
||||
404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled
|
||||
501 : 'cdetype', # 4 chars (PDOC or EBOK)
|
||||
502 : 'lastupdatetime',
|
||||
503 : 'updatedtitle',
|
||||
}.get(self.type, repr(self.type))
|
||||
|
||||
if self.name in ('coveroffset', 'thumboffset', 'hasfakecover',
|
||||
'Creator Major Version', 'Creator Minor Version',
|
||||
'Creator Build Number', 'Creator Software', 'startreading'):
|
||||
self.data, = struct.unpack(b'>I', self.data)
|
||||
|
||||
def __str__(self):
|
||||
return '%s (%d): %r'%(self.name, self.type, self.data)
|
||||
|
||||
class EXTHHeader(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.raw = raw
|
||||
if not self.raw.startswith(b'EXTH'):
|
||||
raise ValueError('EXTH header does not start with EXTH')
|
||||
self.length, = struct.unpack(b'>I', self.raw[4:8])
|
||||
self.count, = struct.unpack(b'>I', self.raw[8:12])
|
||||
|
||||
pos = 12
|
||||
self.records = []
|
||||
for i in xrange(self.count):
|
||||
pos = self.read_record(pos)
|
||||
|
||||
def read_record(self, pos):
|
||||
type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
|
||||
data = self.raw[(pos+8):(pos+length)]
|
||||
self.records.append(EXTHRecord(type_, data))
|
||||
return pos + length
|
||||
|
||||
def __str__(self):
|
||||
ans = ['*'*20 + ' EXTH Header '+ '*'*20]
|
||||
ans.append('EXTH header length: %d'%self.length)
|
||||
ans.append('Number of EXTH records: %d'%self.count)
|
||||
ans.append('EXTH records...')
|
||||
for r in self.records:
|
||||
ans.append(str(r))
|
||||
return '\n'.join(ans)
|
||||
|
||||
|
||||
class MOBIHeader(object):
|
||||
|
||||
def __init__(self, record0):
|
||||
self.raw = record0.raw
|
||||
|
||||
self.compression_raw = self.raw[:2]
|
||||
self.compression = {1: 'No compression', 2: 'PalmDoc compression',
|
||||
17480: 'HUFF/CDIC compression'}.get(struct.unpack(b'>H',
|
||||
self.compression_raw)[0],
|
||||
repr(self.compression_raw))
|
||||
self.unused = self.raw[2:4]
|
||||
self.text_length, = struct.unpack(b'>I', self.raw[4:8])
|
||||
self.number_of_text_records, self.text_record_size = \
|
||||
struct.unpack(b'>HH', self.raw[8:12])
|
||||
self.encryption_type_raw, = struct.unpack(b'>H', self.raw[12:14])
|
||||
self.encryption_type = {0: 'No encryption',
|
||||
1: 'Old mobipocket encryption',
|
||||
2:'Mobipocket encryption'}.get(self.encryption_type_raw,
|
||||
repr(self.encryption_type_raw))
|
||||
self.unknown = self.raw[14:16]
|
||||
|
||||
self.identifier = self.raw[16:20]
|
||||
if self.identifier != b'MOBI':
|
||||
raise ValueError('Identifier %r unknown'%self.identifier)
|
||||
|
||||
self.length, = struct.unpack(b'>I', self.raw[20:24])
|
||||
self.type_raw, = struct.unpack(b'>I', self.raw[24:28])
|
||||
self.type = {
|
||||
2 : 'Mobipocket book',
|
||||
3 : 'PalmDOC book',
|
||||
4 : 'Audio',
|
||||
257 : 'News',
|
||||
258 : 'News Feed',
|
||||
259 : 'News magazine',
|
||||
513 : 'PICS',
|
||||
514 : 'Word',
|
||||
515 : 'XLS',
|
||||
516 : 'PPT',
|
||||
517 : 'TEXT',
|
||||
518 : 'HTML',
|
||||
}.get(self.type_raw, repr(self.type_raw))
|
||||
|
||||
self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
|
||||
self.encoding = {
|
||||
1252 : 'cp1252',
|
||||
65001: 'utf-8',
|
||||
}.get(self.encoding_raw, repr(self.encoding_raw))
|
||||
self.uid = self.raw[32:36]
|
||||
self.file_version = struct.unpack(b'>I', self.raw[36:40])
|
||||
self.reserved = self.raw[40:48]
|
||||
self.secondary_index_record, = struct.unpack(b'>I', self.raw[48:52])
|
||||
self.reserved2 = self.raw[52:80]
|
||||
self.first_non_book_record, = struct.unpack(b'>I', self.raw[80:84])
|
||||
self.fullname_offset, = struct.unpack(b'>I', self.raw[84:88])
|
||||
self.fullname_length, = struct.unpack(b'>I', self.raw[88:92])
|
||||
self.locale_raw, = struct.unpack(b'>I', self.raw[92:96])
|
||||
langcode = self.locale_raw
|
||||
langid = langcode & 0xFF
|
||||
sublangid = (langcode >> 10) & 0xFF
|
||||
self.language = main_language.get(langid, 'ENGLISH')
|
||||
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
||||
|
||||
self.input_language = self.raw[96:100]
|
||||
self.output_langauage = self.raw[100:104]
|
||||
self.min_version, = struct.unpack(b'>I', self.raw[104:108])
|
||||
self.first_image_index, = struct.unpack(b'>I', self.raw[108:112])
|
||||
self.huffman_record_offset, = struct.unpack(b'>I', self.raw[112:116])
|
||||
self.huffman_record_count, = struct.unpack(b'>I', self.raw[116:120])
|
||||
self.unknown2 = self.raw[120:128]
|
||||
self.exth_flags, = struct.unpack(b'>I', self.raw[128:132])
|
||||
self.has_exth = bool(self.exth_flags & 0x40)
|
||||
self.has_drm_data = self.length >= 174 and len(self.raw) >= 180
|
||||
if self.has_drm_data:
|
||||
self.unknown3 = self.raw[132:164]
|
||||
self.drm_offset, = struct.unpack(b'>I', self.raw[164:168])
|
||||
self.drm_count, = struct.unpack(b'>I', self.raw[168:172])
|
||||
self.drm_size, = struct.unpack(b'>I', self.raw[172:176])
|
||||
self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
|
||||
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
|
||||
self.has_fcis_flis = False
|
||||
if self.has_extra_data_flags:
|
||||
self.unknown4 = self.raw[180:192]
|
||||
self.first_content_record, self.last_content_record = \
|
||||
struct.unpack(b'>HH', self.raw[192:196])
|
||||
self.unknown5, = struct.unpack(b'>I', self.raw[196:200])
|
||||
(self.fcis_number, self.fcis_count, self.flis_number,
|
||||
self.flis_count) = struct.unpack(b'>IIII',
|
||||
self.raw[200:216])
|
||||
self.unknown6 = self.raw[216:240]
|
||||
self.extra_data_flags = bin(struct.unpack(b'>I',
|
||||
self.raw[240:244])[0])
|
||||
self.primary_index_record, = struct.unpack(b'>I',
|
||||
self.raw[244:248])
|
||||
|
||||
if self.has_exth:
|
||||
self.exth_offset = 16 + self.length
|
||||
|
||||
self.exth = EXTHHeader(self.raw[self.exth_offset:])
|
||||
|
||||
self.end_of_exth = self.exth_offset + self.exth.length
|
||||
self.bytes_after_exth = self.fullname_offset - self.end_of_exth
|
||||
|
||||
def __str__(self):
|
||||
ans = ['*'*20 + ' MOBI Header '+ '*'*20]
|
||||
ans.append('Compression: %s'%self.compression)
|
||||
ans.append('Unused: %r'%self.unused)
|
||||
ans.append('Number of text records: %d'%self.number_of_text_records)
|
||||
ans.append('Text record size: %d'%self.text_record_size)
|
||||
ans.append('Encryption: %s'%self.encryption_type)
|
||||
ans.append('Unknown: %r'%self.unknown)
|
||||
ans.append('Identifier: %r'%self.identifier)
|
||||
ans.append('Header length: %d'% self.length)
|
||||
ans.append('Type: %s'%self.type)
|
||||
ans.append('Encoding: %s'%self.encoding)
|
||||
ans.append('UID: %r'%self.uid)
|
||||
ans.append('File version: %d'%self.file_version)
|
||||
ans.append('Reserved: %r'%self.reserved)
|
||||
ans.append('Secondary index record: %d (null val: %d)'%(
|
||||
self.secondary_index_record, 0xffffffff))
|
||||
ans.append('Reserved2: %r'%self.reserved2)
|
||||
ans.append('First non-book record: %d'% self.first_non_book_record)
|
||||
ans.append('Full name offset: %d'%self.fullname_offset)
|
||||
ans.append('Full name length: %d bytes'%self.fullname_length)
|
||||
ans.append('Langcode: %r'%self.locale_raw)
|
||||
ans.append('Language: %s'%self.language)
|
||||
ans.append('Sub language: %s'%self.sublanguage)
|
||||
ans.append('Input language: %r'%self.input_language)
|
||||
ans.append('Output language: %r'%self.output_langauage)
|
||||
ans.append('Min version: %d'%self.min_version)
|
||||
ans.append('First Image index: %d'%self.first_image_index)
|
||||
ans.append('Huffman record offset: %d'%self.huffman_record_offset)
|
||||
ans.append('Huffman record count: %d'%self.huffman_record_count)
|
||||
ans.append('Unknown2: %r'%self.unknown2)
|
||||
ans.append('EXTH flags: %r (%s)'%(self.exth_flags, self.has_exth))
|
||||
if self.has_drm_data:
|
||||
ans.append('Unknown3: %r'%self.unknown3)
|
||||
ans.append('DRM Offset: %s'%self.drm_offset)
|
||||
ans.append('DRM Count: %s'%self.drm_count)
|
||||
ans.append('DRM Size: %s'%self.drm_size)
|
||||
ans.append('DRM Flags: %r'%self.drm_flags)
|
||||
if self.has_extra_data_flags:
|
||||
ans.append('Unknown4: %r'%self.unknown4)
|
||||
ans.append('First content record: %d'% self.first_content_record)
|
||||
ans.append('Last content record: %d'% self.last_content_record)
|
||||
ans.append('Unknown5: %d'% self.unknown5)
|
||||
ans.append('FCIS number: %d'% self.fcis_number)
|
||||
ans.append('FCIS count: %d'% self.fcis_count)
|
||||
ans.append('FLIS number: %d'% self.flis_number)
|
||||
ans.append('FLIS count: %d'% self.flis_count)
|
||||
ans.append('Unknown6: %r'% self.unknown6)
|
||||
ans.append('Extra data flags: %r'%self.extra_data_flags)
|
||||
ans.append('Primary index record: %d'%self.primary_index_record)
|
||||
|
||||
ans = '\n'.join(ans)
|
||||
|
||||
if self.has_exth:
|
||||
ans += '\n\n' + str(self.exth)
|
||||
ans += '\n\nBytes after EXTH: %d'%self.bytes_after_exth
|
||||
|
||||
ans += '\nNumber of bytes after full name: %d' % (len(self.raw) - (self.fullname_offset +
|
||||
self.fullname_length))
|
||||
|
||||
ans += '\nRecord 0 length: %d'%len(self.raw)
|
||||
return ans
|
||||
|
||||
class MOBIFile(object):
|
||||
|
||||
def __init__(self, stream):
|
||||
self.raw = stream.read()
|
||||
|
||||
self.palmdb = PalmDB(self.raw[:78])
|
||||
|
||||
self.record_headers = []
|
||||
self.records = []
|
||||
for i in xrange(self.palmdb.number_of_records):
|
||||
pos = 78 + i * 8
|
||||
offset, a1, a2, a3, a4 = struct.unpack(b'>LBBBB', self.raw[pos:pos+8])
|
||||
flags, val = a1, a2 << 16 | a3 << 8 | a4
|
||||
self.record_headers.append((offset, flags, val))
|
||||
|
||||
def section(section_number):
|
||||
if section_number == self.palmdb.number_of_records - 1:
|
||||
end_off = len(self.raw)
|
||||
else:
|
||||
end_off = self.record_headers[section_number + 1][0]
|
||||
off = self.record_headers[section_number][0]
|
||||
return self.raw[off:end_off]
|
||||
|
||||
for i in range(self.palmdb.number_of_records):
|
||||
self.records.append(Record(section(i), self.record_headers[i]))
|
||||
|
||||
self.mobi_header = MOBIHeader(self.records[0])
|
||||
|
||||
|
||||
def print_header(self):
|
||||
print (str(self.palmdb).encode('utf-8'))
|
||||
print ()
|
||||
print ('Record headers:')
|
||||
for i, r in enumerate(self.records):
|
||||
print ('%6d. %s'%(i, r.header))
|
||||
|
||||
print ()
|
||||
print (str(self.mobi_header).encode('utf-8'))
|
||||
|
||||
def inspect_mobi(path_or_stream):
|
||||
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
|
||||
open(path_or_stream, 'rb'))
|
||||
f = MOBIFile(stream)
|
||||
f.print_header()
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
f = MOBIFile(open(sys.argv[1], 'rb'))
|
||||
f.print_header()
|
||||
|
@ -463,9 +463,9 @@ class MobiMLizer(object):
|
||||
text = COLLAPSE.sub(' ', elem.text)
|
||||
valign = style['vertical-align']
|
||||
not_baseline = valign in ('super', 'sub', 'text-top',
|
||||
'text-bottom') or (
|
||||
'text-bottom', 'top', 'bottom') or (
|
||||
isinstance(valign, (float, int)) and abs(valign) != 0)
|
||||
issup = valign in ('super', 'text-top') or (
|
||||
issup = valign in ('super', 'text-top', 'top') or (
|
||||
isinstance(valign, (float, int)) and valign > 0)
|
||||
vtag = 'sup' if issup else 'sub'
|
||||
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
|
||||
@ -484,6 +484,7 @@ class MobiMLizer(object):
|
||||
parent = bstate.para if bstate.inline is None else bstate.inline
|
||||
if parent is not None:
|
||||
vtag = etree.SubElement(parent, XHTML(vtag))
|
||||
vtag = etree.SubElement(vtag, XHTML('small'))
|
||||
# Add anchors
|
||||
for child in vbstate.body:
|
||||
if child is not vbstate.para:
|
||||
@ -495,6 +496,10 @@ class MobiMLizer(object):
|
||||
vtag.append(child)
|
||||
return
|
||||
|
||||
if tag == 'blockquote':
|
||||
old_mim = self.opts.mobi_ignore_margins
|
||||
self.opts.mobi_ignore_margins = False
|
||||
|
||||
if text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS:
|
||||
self.mobimlize_content(tag, text, bstate, istates)
|
||||
for child in elem:
|
||||
@ -510,6 +515,8 @@ class MobiMLizer(object):
|
||||
if tail:
|
||||
self.mobimlize_content(tag, tail, bstate, istates)
|
||||
|
||||
if tag == 'blockquote':
|
||||
self.opts.mobi_ignore_margins = old_mim
|
||||
|
||||
if bstate.content and style['page-break-after'] in PAGE_BREAKS:
|
||||
bstate.pbreak = True
|
||||
|
@ -20,7 +20,7 @@ from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.date import parse_date
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.ebooks import DRMError, unit_convert
|
||||
from calibre.ebooks.chardet import ENCODING_PATS
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||
@ -258,6 +258,8 @@ class MobiReader(object):
|
||||
}
|
||||
''')
|
||||
self.tag_css_rules = {}
|
||||
self.left_margins = {}
|
||||
self.text_indents = {}
|
||||
|
||||
if hasattr(filename_or_stream, 'read'):
|
||||
stream = filename_or_stream
|
||||
@ -567,9 +569,21 @@ class MobiReader(object):
|
||||
elif tag.tag == 'img':
|
||||
tag.set('width', width)
|
||||
else:
|
||||
styles.append('text-indent: %s' % self.ensure_unit(width))
|
||||
ewidth = self.ensure_unit(width)
|
||||
styles.append('text-indent: %s' % ewidth)
|
||||
try:
|
||||
ewidth_val = unit_convert(ewidth, 12, 500, 166)
|
||||
self.text_indents[tag] = ewidth_val
|
||||
except:
|
||||
pass
|
||||
if width.startswith('-'):
|
||||
styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
|
||||
try:
|
||||
ewidth_val = unit_convert(ewidth[1:], 12, 500, 166)
|
||||
self.left_margins[tag] = ewidth_val
|
||||
except:
|
||||
pass
|
||||
|
||||
if attrib.has_key('align'):
|
||||
align = attrib.pop('align').strip()
|
||||
if align:
|
||||
@ -661,6 +675,26 @@ class MobiReader(object):
|
||||
if hasattr(parent, 'remove'):
|
||||
parent.remove(tag)
|
||||
|
||||
def get_left_whitespace(self, tag):
|
||||
|
||||
def whitespace(tag):
|
||||
lm = ti = 0.0
|
||||
if tag.tag == 'p':
|
||||
ti = unit_convert('1.5em', 12, 500, 166)
|
||||
if tag.tag == 'blockquote':
|
||||
lm = unit_convert('2em', 12, 500, 166)
|
||||
lm = self.left_margins.get(tag, lm)
|
||||
ti = self.text_indents.get(tag, ti)
|
||||
return lm + ti
|
||||
|
||||
parent = tag
|
||||
ans = 0.0
|
||||
while parent is not None:
|
||||
ans += whitespace(parent)
|
||||
parent = parent.getparent()
|
||||
|
||||
return ans
|
||||
|
||||
def create_opf(self, htmlfile, guide=None, root=None):
|
||||
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
|
||||
if mi is None:
|
||||
@ -716,6 +750,7 @@ class MobiReader(object):
|
||||
ent_pat = re.compile(r'&(\S+?);')
|
||||
if elems:
|
||||
tocobj = TOC()
|
||||
found = False
|
||||
reached = False
|
||||
for x in root.iter():
|
||||
if x == elems[-1]:
|
||||
@ -730,15 +765,45 @@ class MobiReader(object):
|
||||
except:
|
||||
text = ''
|
||||
text = ent_pat.sub(entity_to_unicode, text)
|
||||
tocobj.add_item(toc.partition('#')[0], href[1:],
|
||||
item = tocobj.add_item(toc.partition('#')[0], href[1:],
|
||||
text)
|
||||
if reached and x.get('class', None) == 'mbp_pagebreak':
|
||||
item.left_space = int(self.get_left_whitespace(x))
|
||||
found = True
|
||||
if reached and found and x.get('class', None) == 'mbp_pagebreak':
|
||||
break
|
||||
if tocobj is not None:
|
||||
tocobj = self.structure_toc(tocobj)
|
||||
opf.set_toc(tocobj)
|
||||
|
||||
return opf, ncx_manifest_entry
|
||||
|
||||
def structure_toc(self, toc):
|
||||
indent_vals = set()
|
||||
for item in toc:
|
||||
indent_vals.add(item.left_space)
|
||||
if len(indent_vals) > 6 or len(indent_vals) < 2:
|
||||
# Too many or too few levels, give up
|
||||
return toc
|
||||
indent_vals = sorted(indent_vals)
|
||||
|
||||
last_found = [None for i in indent_vals]
|
||||
|
||||
newtoc = TOC()
|
||||
|
||||
def find_parent(level):
|
||||
candidates = last_found[:level]
|
||||
for x in reversed(candidates):
|
||||
if x is not None:
|
||||
return x
|
||||
return newtoc
|
||||
|
||||
for item in toc:
|
||||
level = indent_vals.index(item.left_space)
|
||||
parent = find_parent(level)
|
||||
last_found[level] = parent.add_item(item.href, item.fragment,
|
||||
item.text)
|
||||
|
||||
return newtoc
|
||||
|
||||
def sizeof_trailing_entries(self, data):
|
||||
def sizeof_trailing_entry(ptr, psize):
|
||||
|
@ -7,8 +7,6 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam> and \
|
||||
Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
|
||||
from collections import defaultdict
|
||||
from itertools import count
|
||||
from itertools import izip
|
||||
import random
|
||||
import re
|
||||
from struct import pack
|
||||
@ -310,10 +308,11 @@ class Serializer(object):
|
||||
if href not in id_offsets:
|
||||
self.logger.warn('Hyperlink target %r not found' % href)
|
||||
href, _ = urldefrag(href)
|
||||
ioff = self.id_offsets[href]
|
||||
for hoff in hoffs:
|
||||
buffer.seek(hoff)
|
||||
buffer.write('%010d' % ioff)
|
||||
if href in self.id_offsets:
|
||||
ioff = self.id_offsets[href]
|
||||
for hoff in hoffs:
|
||||
buffer.seek(hoff)
|
||||
buffer.write('%010d' % ioff)
|
||||
|
||||
class MobiWriter(object):
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
@ -1510,7 +1509,7 @@ class MobiWriter(object):
|
||||
record0.write(exth)
|
||||
record0.write(title)
|
||||
record0 = record0.getvalue()
|
||||
self._records[0] = record0 + ('\0' * (2452 - len(record0)))
|
||||
self._records[0] = record0 + ('\0' * (1024*8))
|
||||
|
||||
def _build_exth(self):
|
||||
oeb = self._oeb
|
||||
@ -1629,8 +1628,8 @@ class MobiWriter(object):
|
||||
self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
|
||||
'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords))
|
||||
offset = self._tell() + (8 * nrecords) + 2
|
||||
for id, record in izip(count(), self._records):
|
||||
self._write(pack('>I', offset), '\0', pack('>I', id)[1:])
|
||||
for i, record in enumerate(self._records):
|
||||
self._write(pack('>I', offset), '\0', pack('>I', 2*i)[1:])
|
||||
offset += len(record)
|
||||
self._write('\0\0')
|
||||
|
||||
|
@ -8,23 +8,18 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, uuid, logging
|
||||
from mimetypes import types_map
|
||||
from collections import defaultdict
|
||||
from itertools import count
|
||||
from urlparse import urldefrag, urlparse, urlunparse, urljoin
|
||||
from urllib import unquote as urlunquote
|
||||
|
||||
from lxml import etree, html
|
||||
from cssutils import CSSParser, parseString, parseStyle, replaceUrls
|
||||
from cssutils.css import CSSRule
|
||||
|
||||
import calibre
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.constants import filesystem_encoding, __version__
|
||||
from calibre.translations.dynamic import translate
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||
from calibre import isbytestring
|
||||
from calibre import isbytestring, as_unicode, get_types_map
|
||||
|
||||
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True)
|
||||
|
||||
@ -179,6 +174,9 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
|
||||
If the ``link_repl_func`` returns None, the attribute or
|
||||
tag text will be removed completely.
|
||||
'''
|
||||
from cssutils import parseString, parseStyle, replaceUrls, log
|
||||
log.setLevel(logging.WARN)
|
||||
|
||||
if resolve_base_href:
|
||||
resolve_base_href(root)
|
||||
for el, attrib, link, pos in iterlinks(root, find_links_in_css=False):
|
||||
@ -248,7 +246,7 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
|
||||
el.attrib['style'] = repl
|
||||
|
||||
|
||||
|
||||
types_map = get_types_map()
|
||||
EPUB_MIME = types_map['.epub']
|
||||
XHTML_MIME = types_map['.xhtml']
|
||||
CSS_MIME = types_map['.css']
|
||||
@ -448,22 +446,23 @@ class NullContainer(object):
|
||||
class DirContainer(object):
|
||||
"""Filesystem directory container."""
|
||||
|
||||
def __init__(self, path, log):
|
||||
def __init__(self, path, log, ignore_opf=False):
|
||||
self.log = log
|
||||
if isbytestring(path):
|
||||
path = path.decode(filesystem_encoding)
|
||||
self.opfname = None
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if ext == '.opf':
|
||||
self.opfname = os.path.basename(path)
|
||||
self.rootdir = os.path.dirname(path)
|
||||
return
|
||||
self.rootdir = path
|
||||
for path in self.namelist():
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if ext == '.opf':
|
||||
self.opfname = path
|
||||
return
|
||||
self.opfname = None
|
||||
if not ignore_opf:
|
||||
for path in self.namelist():
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if ext == '.opf':
|
||||
self.opfname = path
|
||||
return
|
||||
|
||||
def read(self, path):
|
||||
if path is None:
|
||||
@ -643,7 +642,7 @@ class Metadata(object):
|
||||
return unicode(self.value).encode('ascii', 'xmlcharrefreplace')
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(self.value)
|
||||
return as_unicode(self.value)
|
||||
|
||||
def to_opf1(self, dcmeta=None, xmeta=None, nsrmap={}):
|
||||
attrib = {}
|
||||
@ -1075,7 +1074,9 @@ class Manifest(object):
|
||||
|
||||
|
||||
def _parse_css(self, data):
|
||||
|
||||
from cssutils.css import CSSRule
|
||||
from cssutils import CSSParser, log
|
||||
log.setLevel(logging.WARN)
|
||||
def get_style_rules_from_import(import_rule):
|
||||
ans = []
|
||||
if not import_rule.styleSheet:
|
||||
@ -2011,7 +2012,7 @@ class OEBBook(object):
|
||||
name='dtb:uid', content=unicode(self.uid))
|
||||
etree.SubElement(head, NCX('meta'),
|
||||
name='dtb:depth', content=str(self.toc.depth()))
|
||||
generator = ''.join(['calibre (', calibre.__version__, ')'])
|
||||
generator = ''.join(['calibre (', __version__, ')'])
|
||||
etree.SubElement(head, NCX('meta'),
|
||||
name='dtb:generator', content=generator)
|
||||
etree.SubElement(head, NCX('meta'),
|
||||
|
@ -1,75 +0,0 @@
|
||||
'''
|
||||
Device profiles.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
from itertools import izip
|
||||
|
||||
FONT_SIZES = [('xx-small', 1),
|
||||
('x-small', None),
|
||||
('small', 2),
|
||||
('medium', 3),
|
||||
('large', 4),
|
||||
('x-large', 5),
|
||||
('xx-large', 6),
|
||||
(None, 7)]
|
||||
|
||||
|
||||
class Profile(object):
|
||||
def __init__(self, width, height, dpi, fbase, fsizes):
|
||||
self.width = (float(width) / dpi) * 72.
|
||||
self.height = (float(height) / dpi) * 72.
|
||||
self.dpi = float(dpi)
|
||||
self.fbase = float(fbase)
|
||||
self.fsizes = []
|
||||
for (name, num), size in izip(FONT_SIZES, fsizes):
|
||||
self.fsizes.append((name, num, float(size)))
|
||||
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
||||
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
||||
|
||||
|
||||
PROFILES = {
|
||||
'PRS505':
|
||||
Profile(width=584, height=754, dpi=168.451, fbase=12,
|
||||
fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
|
||||
|
||||
'MSReader':
|
||||
Profile(width=480, height=652, dpi=96, fbase=13,
|
||||
fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
|
||||
|
||||
# Not really, but let's pretend
|
||||
'Mobipocket':
|
||||
Profile(width=600, height=800, dpi=96, fbase=18,
|
||||
fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
|
||||
|
||||
# No clue on usable screen size; DPI should be good
|
||||
'HanlinV3':
|
||||
Profile(width=584, height=754, dpi=168.451, fbase=16,
|
||||
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||
|
||||
'CybookG3':
|
||||
Profile(width=600, height=800, dpi=168.451, fbase=16,
|
||||
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||
|
||||
'Kindle':
|
||||
Profile(width=525, height=640, dpi=168.451, fbase=16,
|
||||
fsizes=[12, 12, 14, 16, 18, 20, 22, 24]),
|
||||
|
||||
'Browser':
|
||||
Profile(width=800, height=600, dpi=100.0, fbase=12,
|
||||
fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
|
||||
}
|
||||
|
||||
|
||||
class Context(object):
|
||||
PROFILES = PROFILES
|
||||
|
||||
def __init__(self, source, dest):
|
||||
if source in PROFILES:
|
||||
source = PROFILES[source]
|
||||
if dest in PROFILES:
|
||||
dest = PROFILES[dest]
|
||||
self.source = source
|
||||
self.dest = dest
|